Remove sparc64_hpc_ace nonbonded kernels
authorRoland Schulz <roland.schulz@intel.com>
Wed, 12 Sep 2018 19:58:23 +0000 (12:58 -0700)
committerSzilárd Páll <pall.szilard@gmail.com>
Thu, 13 Sep 2018 14:36:59 +0000 (16:36 +0200)
Unclear whether they work after C++ change. No plans
to test them.

Change-Id: I5fe296ec34c51d0e9d0500eef3c6c7d5cd1b4a76

124 files changed:
docs/doxygen/suppressions.txt
src/gromacs/gmxlib/nonbonded/CMakeLists.txt
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJEw_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.cpp [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h [deleted file]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_template_sparc64_hpc_ace_double.pre [deleted file]
src/gromacs/gmxlib/nonbonded/nonbonded.cpp

index 5627a7593acb230f7f46886951f20f3df88635d3..bacbd4b012e5c91034f67533ecb8d25870144299 100644 (file)
@@ -19,7 +19,6 @@ src/gromacs/ewald/pme-simd4.h: warning: should include "pme-simd.h"
 src/gromacs/ewald/pme-spline-work.cpp: warning: includes "simd.h" unnecessarily
 src/gromacs/ewald/pme-spline-work.h: warning: includes "simd.h" unnecessarily
 src/gromacs/ewald/pme-spread.cpp: warning: includes "simd.h" unnecessarily
 src/gromacs/ewald/pme-spline-work.cpp: warning: includes "simd.h" unnecessarily
 src/gromacs/ewald/pme-spline-work.h: warning: includes "simd.h" unnecessarily
 src/gromacs/ewald/pme-spread.cpp: warning: includes "simd.h" unnecessarily
-src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h: warning: includes "simd.h" unnecessarily
 src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h: warning: should include "simd.h"
 src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h: warning: should include "simd.h"
 src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h: warning: should include "simd.h"
 src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h: warning: should include "simd.h"
 src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h: warning: should include "simd.h"
 src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h: warning: should include "simd.h"
@@ -32,7 +31,6 @@ src/gromacs/mdlib/nbnxn_search_simd_4xn.h: warning: should include "simd.h"
 
 # These would be nice to fix, but can wait for later / deletion / rewrites
 src/gromacs/gmxlib/nonbonded/nb_kernel_*/*: warning: includes "config.h" unnecessarily
 
 # These would be nice to fix, but can wait for later / deletion / rewrites
 src/gromacs/gmxlib/nonbonded/nb_kernel_*/*: warning: includes "config.h" unnecessarily
-src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h: warning: should include "config.h"
 src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.cpp: warning: includes "config.h" unnecessarily
 src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.cpp: warning: includes "config.h" unnecessarily
 src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_common.h: warning: should include "config.h"
 src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.cpp: warning: includes "config.h" unnecessarily
 src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.cpp: warning: includes "config.h" unnecessarily
 src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_common.h: warning: should include "config.h"
index 62c40ccd3451e88c3c2f77016bfd32e05323d799..29bae79a4275a281f57c342e04963819a920f107 100644 (file)
@@ -80,11 +80,6 @@ if((("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_256")
     file(GLOB NONBONDED_AVX_256_DOUBLE_SOURCES nb_kernel_avx_256_double/*.cpp)
 endif()
 
     file(GLOB NONBONDED_AVX_256_DOUBLE_SOURCES nb_kernel_avx_256_double/*.cpp)
 endif()
 
-if("${GMX_SIMD_ACTIVE}" STREQUAL "SPARC64_HPC_ACE" AND GMX_DOUBLE)
-    file(GLOB NONBONDED_SPARC64_HPC_ACE_DOUBLE_SOURCES nb_kernel_sparc64_hpc_ace_double/*.cpp)
-endif()
-
-
 # These sources will be used in the parent directory's CMakeLists.txt
 set(NONBONDED_KERNEL_SOURCES ${NONBONDED_C_SOURCES} ${NONBONDED_SSE2_SINGLE_SOURCES} ${NONBONDED_SSE4_1_SINGLE_SOURCES} ${NONBONDED_AVX_128_FMA_SINGLE_SOURCES} ${NONBONDED_AVX_256_SINGLE_SOURCES} ${NONBONDED_SSE2_DOUBLE_SOURCES} ${NONBONDED_SSE4_1_DOUBLE_SOURCES} ${NONBONDED_AVX_128_FMA_DOUBLE_SOURCES} ${NONBONDED_AVX_256_DOUBLE_SOURCES} ${NONBONDED_SPARC64_HPC_ACE_DOUBLE_SOURCES})
 target_sources(libgromacs_generated PRIVATE ${NONBONDED_KERNEL_SOURCES})
 # These sources will be used in the parent directory's CMakeLists.txt
 set(NONBONDED_KERNEL_SOURCES ${NONBONDED_C_SOURCES} ${NONBONDED_SSE2_SINGLE_SOURCES} ${NONBONDED_SSE4_1_SINGLE_SOURCES} ${NONBONDED_AVX_128_FMA_SINGLE_SOURCES} ${NONBONDED_AVX_256_SINGLE_SOURCES} ${NONBONDED_SSE2_DOUBLE_SOURCES} ${NONBONDED_SSE4_1_DOUBLE_SOURCES} ${NONBONDED_AVX_128_FMA_DOUBLE_SOURCES} ${NONBONDED_AVX_256_DOUBLE_SOURCES} ${NONBONDED_SPARC64_HPC_ACE_DOUBLE_SOURCES})
 target_sources(libgromacs_generated PRIVATE ${NONBONDED_KERNEL_SOURCES})
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h
deleted file mode 100644 (file)
index 32b1385..0000000
+++ /dev/null
@@ -1,972 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#ifndef _kernelutil_sparc64_hpc_ace_double_h_
-#define _kernelutil_sparc64_hpc_ace_double_h_
-
-/* Get gmx_simd_exp_d() */
-#include "gromacs/simd/simd.h"
-#include "gromacs/simd/simd_math.h"
-
-/* Fujitsu header borrows the name from SSE2, since some instructions have aliases.
- * Environment/compiler version GM-1.2.0-17 seems to be buggy; when -Xg is
- * defined to enable GNUC extensions, this sets _ISOC99_SOURCE, which in
- * turn causes all intrinsics to be declared inline _instead_ of static. This
- * leads to duplicate symbol errors at link time.
- * To work around this we unset this before including the HPC-ACE header, and
- * reset the value afterwards.
- */
-#ifdef _ISOC99_SOURCE
-#    undef _ISOC99_SOURCE
-#    define SAVE_ISOC99_SOURCE
-#endif
-
-#include <emmintrin.h>
-
-#ifdef SAVE_ISOC99_SOURCE
-#    define _ISOC99_SOURCE
-#    undef SAVE_ISOC99_SOURCE
-#endif
-
-#define GMX_FJSP_SHUFFLE2(x, y) (((x)<<1) | (y))
-
-#define GMX_FJSP_TRANSPOSE2_V2R8(row0, row1) {           \
-        _fjsp_v2r8 __gmx_t1 = row0;                          \
-        row0           = _fjsp_unpacklo_v2r8(row0, row1);     \
-        row1           = _fjsp_unpackhi_v2r8(__gmx_t1, row1); \
-}
-
-
-static void
-gmx_fjsp_print_v2r8(const char *s, _fjsp_v2r8 a)
-{
-    double lo, hi;
-
-    _fjsp_storel_v2r8(&lo, a);
-    _fjsp_storeh_v2r8(&hi, a);
-    printf("%s: %g %g\n", s, lo, hi);
-}
-
-
-static _fjsp_v2r8
-gmx_fjsp_set1_v2r8(double d)
-{
-    return _fjsp_set_v2r8(d, d);
-}
-
-static _fjsp_v2r8
-gmx_fjsp_load1_v2r8(const double * gmx_restrict ptr)
-{
-    return gmx_fjsp_set1_v2r8(*ptr);
-}
-
-
-static int
-gmx_fjsp_any_lt_v2r8(_fjsp_v2r8 a, _fjsp_v2r8 b)
-{
-    union
-    {
-        double           d;
-        long long int    i;
-    }
-    conv;
-
-    a = _fjsp_cmplt_v2r8(a, b);
-    a = _fjsp_or_v2r8(a, _fjsp_unpackhi_v2r8(a, a));
-    _fjsp_storel_v2r8(&(conv.d), a);
-    return (conv.i != 0);
-}
-
-/* 1.0/sqrt(x) */
-static gmx_inline _fjsp_v2r8
-gmx_fjsp_invsqrt_v2r8(_fjsp_v2r8 x)
-{
-    const _fjsp_v2r8 half  = gmx_fjsp_set1_v2r8(0.5);
-    const _fjsp_v2r8 three = gmx_fjsp_set1_v2r8(3.0);
-    _fjsp_v2r8       lu    = _fjsp_rsqrta_v2r8(x);
-
-    lu = _fjsp_mul_v2r8(_fjsp_mul_v2r8(half, lu), _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(lu, lu), x, three));
-    /* The HPC-ACE instruction set is only available in double precision, while
-     * single precision is typically sufficient for Gromacs. If you define
-     * "GMX_RELAXED_DOUBLE_PRECISION" during compile, we stick to two Newton-Raphson
-     * iterations and accept 32bits of accuracy in 1.0/sqrt(x) and 1.0/x, rather than full
-     * double precision (53 bits). This is still clearly higher than single precision (24 bits).
-     */
-#ifndef GMX_RELAXED_DOUBLE_PRECISION
-    lu = _fjsp_mul_v2r8(_fjsp_mul_v2r8(half, lu), _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(lu, lu), x, three));
-#endif
-    return _fjsp_mul_v2r8(_fjsp_mul_v2r8(half, lu), _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(lu, lu), x, three));
-}
-
-
-/* 1.0/x */
-static gmx_inline _fjsp_v2r8
-gmx_fjsp_inv_v2r8(_fjsp_v2r8 x)
-{
-    const _fjsp_v2r8 two  = gmx_fjsp_set1_v2r8(2.0);
-    __m128d          lu   = _fjsp_rcpa_v2r8(x);
-
-    /* Perform three N-R steps for double precision */
-    lu         = _fjsp_mul_v2r8(lu, _fjsp_nmsub_v2r8(lu, x, two));
-    /* The HPC-ACE instruction set is only available in double precision, while
-     * single precision is typically sufficient for Gromacs. If you define
-     * "GMX_RELAXED_DOUBLE_PRECISION" during compile, we stick to two Newton-Raphson
-     * iterations and accept 32bits of accuracy in 1.0/sqrt(x) and 1.0/x, rather than full
-     * double precision (53 bits). This is still clearly higher than single precision (24 bits).
-     */
-#ifndef GMX_RELAXED_DOUBLE_PRECISION
-    lu         = _fjsp_mul_v2r8(lu, _fjsp_nmsub_v2r8(lu, x, two));
-#endif
-    return _fjsp_mul_v2r8(lu, _fjsp_nmsub_v2r8(lu, x, two));
-}
-
-
-static gmx_inline _fjsp_v2r8
-gmx_fjsp_calc_rsq_v2r8(_fjsp_v2r8 dx, _fjsp_v2r8 dy, _fjsp_v2r8 dz)
-{
-    return _fjsp_madd_v2r8(dx, dx, _fjsp_madd_v2r8(dy, dy, _fjsp_mul_v2r8(dz, dz)));
-}
-
-/* Normal sum of four ymm registers */
-#define gmx_fjsp_sum4_v2r8(t0, t1, t2, t3)  _fjsp_add_v2r8(_fjsp_add_v2r8(t0, t1), _fjsp_add_v2r8(t2, t3))
-
-
-
-
-
-static _fjsp_v2r8
-gmx_fjsp_load_2real_swizzle_v2r8(const double * gmx_restrict ptrA,
-                                 const double * gmx_restrict ptrB)
-{
-    return _fjsp_unpacklo_v2r8(_fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA), _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB));
-}
-
-static _fjsp_v2r8
-gmx_fjsp_load_1real_v2r8(const double * gmx_restrict ptrA)
-{
-    return _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA);
-}
-
-
-static void
-gmx_fjsp_store_2real_swizzle_v2r8(double * gmx_restrict ptrA,
-                                  double * gmx_restrict ptrB,
-                                  _fjsp_v2r8            xmm1)
-{
-    _fjsp_v2r8 t2;
-
-    t2       = _fjsp_unpackhi_v2r8(xmm1, xmm1);
-    _fjsp_storel_v2r8(ptrA, xmm1);
-    _fjsp_storel_v2r8(ptrB, t2);
-}
-
-static void
-gmx_fjsp_store_1real_v2r8(double * gmx_restrict ptrA, _fjsp_v2r8 xmm1)
-{
-    _fjsp_storel_v2r8(ptrA, xmm1);
-}
-
-
-/* Similar to store, but increments value in memory */
-static void
-gmx_fjsp_increment_2real_swizzle_v2r8(double * gmx_restrict ptrA,
-                                      double * gmx_restrict ptrB, _fjsp_v2r8 xmm1)
-{
-    _fjsp_v2r8 t1;
-
-    t1   = _fjsp_unpackhi_v2r8(xmm1, xmm1);
-    xmm1 = _fjsp_add_v2r8(xmm1, _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA));
-    t1   = _fjsp_add_v2r8(t1, _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB));
-    _fjsp_storel_v2r8(ptrA, xmm1);
-    _fjsp_storel_v2r8(ptrB, t1);
-}
-
-static void
-gmx_fjsp_increment_1real_v2r8(double * gmx_restrict ptrA, _fjsp_v2r8 xmm1)
-{
-    _fjsp_v2r8 tmp;
-
-    tmp = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA);
-    tmp = _fjsp_add_v2r8(tmp, xmm1);
-    _fjsp_storel_v2r8(ptrA, tmp);
-}
-
-
-
-static gmx_inline void
-gmx_fjsp_load_2pair_swizzle_v2r8(const double * gmx_restrict p1,
-                                 const double * gmx_restrict p2,
-                                 _fjsp_v2r8 * gmx_restrict   c6,
-                                 _fjsp_v2r8 * gmx_restrict   c12)
-{
-    _fjsp_v2r8 t1, t2, t3;
-
-    /* The c6/c12 array should be aligned */
-    t1   = _fjsp_load_v2r8(p1);
-    t2   = _fjsp_load_v2r8(p2);
-    *c6  = _fjsp_unpacklo_v2r8(t1, t2);
-    *c12 = _fjsp_unpackhi_v2r8(t1, t2);
-}
-
-static gmx_inline void
-gmx_fjsp_load_1pair_swizzle_v2r8(const double * gmx_restrict p1,
-                                 _fjsp_v2r8 * gmx_restrict   c6,
-                                 _fjsp_v2r8 * gmx_restrict   c12)
-{
-    *c6     = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1);
-    *c12    = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+1);
-}
-
-
-static gmx_inline void
-gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(const double * gmx_restrict xyz_shift,
-                                             const double * gmx_restrict xyz,
-                                             _fjsp_v2r8 * gmx_restrict   x1,
-                                             _fjsp_v2r8 * gmx_restrict   y1,
-                                             _fjsp_v2r8 * gmx_restrict   z1)
-{
-    _fjsp_v2r8 mem_xy, mem_z, mem_sxy, mem_sz;
-
-    mem_xy  = _fjsp_load_v2r8(xyz);
-    mem_z   = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), xyz+2);
-    mem_sxy = _fjsp_load_v2r8(xyz_shift);
-    mem_sz  = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), xyz_shift+2);
-
-    mem_xy  = _fjsp_add_v2r8(mem_xy, mem_sxy);
-    mem_z   = _fjsp_add_v2r8(mem_z, mem_sz);
-
-    *x1  = _fjsp_shuffle_v2r8(mem_xy, mem_xy, GMX_FJSP_SHUFFLE2(0, 0));
-    *y1  = _fjsp_shuffle_v2r8(mem_xy, mem_xy, GMX_FJSP_SHUFFLE2(1, 1));
-    *z1  = _fjsp_shuffle_v2r8(mem_z, mem_z, GMX_FJSP_SHUFFLE2(0, 0));
-}
-
-
-static gmx_inline void
-gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(const double * gmx_restrict xyz_shift,
-                                             const double * gmx_restrict xyz,
-                                             _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1,
-                                             _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2,
-                                             _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3)
-{
-    _fjsp_v2r8 t1, t2, t3, t4, t5, sxy, sz, szx, syz;
-
-    t1  = _fjsp_load_v2r8(xyz);
-    t2  = _fjsp_load_v2r8(xyz+2);
-    t3  = _fjsp_load_v2r8(xyz+4);
-    t4  = _fjsp_load_v2r8(xyz+6);
-    t5  = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), xyz+8);
-
-    sxy = _fjsp_load_v2r8(xyz_shift);
-    sz  = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), xyz_shift+2);
-    szx = _fjsp_shuffle_v2r8(sz, sxy, GMX_FJSP_SHUFFLE2(0, 0));
-    syz = _fjsp_shuffle_v2r8(sxy, sz, GMX_FJSP_SHUFFLE2(0, 1));
-
-    t1  = _fjsp_add_v2r8(t1, sxy);
-    t2  = _fjsp_add_v2r8(t2, szx);
-    t3  = _fjsp_add_v2r8(t3, syz);
-    t4  = _fjsp_add_v2r8(t4, sxy);
-    t5  = _fjsp_add_v2r8(t5, sz);
-
-    *x1  = _fjsp_shuffle_v2r8(t1, t1, GMX_FJSP_SHUFFLE2(0, 0));
-    *y1  = _fjsp_shuffle_v2r8(t1, t1, GMX_FJSP_SHUFFLE2(1, 1));
-    *z1  = _fjsp_shuffle_v2r8(t2, t2, GMX_FJSP_SHUFFLE2(0, 0));
-    *x2  = _fjsp_shuffle_v2r8(t2, t2, GMX_FJSP_SHUFFLE2(1, 1));
-    *y2  = _fjsp_shuffle_v2r8(t3, t3, GMX_FJSP_SHUFFLE2(0, 0));
-    *z2  = _fjsp_shuffle_v2r8(t3, t3, GMX_FJSP_SHUFFLE2(1, 1));
-    *x3  = _fjsp_shuffle_v2r8(t4, t4, GMX_FJSP_SHUFFLE2(0, 0));
-    *y3  = _fjsp_shuffle_v2r8(t4, t4, GMX_FJSP_SHUFFLE2(1, 1));
-    *z3  = _fjsp_shuffle_v2r8(t5, t5, GMX_FJSP_SHUFFLE2(0, 0));
-}
-
-
-static gmx_inline void
-gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(const double * gmx_restrict xyz_shift,
-                                             const double * gmx_restrict xyz,
-                                             _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1,
-                                             _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2,
-                                             _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3,
-                                             _fjsp_v2r8 * gmx_restrict x4, _fjsp_v2r8 * gmx_restrict y4, _fjsp_v2r8 * gmx_restrict z4)
-{
-    _fjsp_v2r8 t1, t2, t3, t4, t5, t6, sxy, sz, szx, syz;
-
-    t1  = _fjsp_load_v2r8(xyz);
-    t2  = _fjsp_load_v2r8(xyz+2);
-    t3  = _fjsp_load_v2r8(xyz+4);
-    t4  = _fjsp_load_v2r8(xyz+6);
-    t5  = _fjsp_load_v2r8(xyz+8);
-    t6  = _fjsp_load_v2r8(xyz+10);
-
-    sxy = _fjsp_load_v2r8(xyz_shift);
-    sz  = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), xyz_shift+2);
-    szx = _fjsp_shuffle_v2r8(sz, sxy, GMX_FJSP_SHUFFLE2(0, 0));
-    syz = _fjsp_shuffle_v2r8(sxy, sz, GMX_FJSP_SHUFFLE2(0, 1));
-
-    t1  = _fjsp_add_v2r8(t1, sxy);
-    t2  = _fjsp_add_v2r8(t2, szx);
-    t3  = _fjsp_add_v2r8(t3, syz);
-    t4  = _fjsp_add_v2r8(t4, sxy);
-    t5  = _fjsp_add_v2r8(t5, szx);
-    t6  = _fjsp_add_v2r8(t6, syz);
-
-    *x1  = _fjsp_shuffle_v2r8(t1, t1, GMX_FJSP_SHUFFLE2(0, 0));
-    *y1  = _fjsp_shuffle_v2r8(t1, t1, GMX_FJSP_SHUFFLE2(1, 1));
-    *z1  = _fjsp_shuffle_v2r8(t2, t2, GMX_FJSP_SHUFFLE2(0, 0));
-    *x2  = _fjsp_shuffle_v2r8(t2, t2, GMX_FJSP_SHUFFLE2(1, 1));
-    *y2  = _fjsp_shuffle_v2r8(t3, t3, GMX_FJSP_SHUFFLE2(0, 0));
-    *z2  = _fjsp_shuffle_v2r8(t3, t3, GMX_FJSP_SHUFFLE2(1, 1));
-    *x3  = _fjsp_shuffle_v2r8(t4, t4, GMX_FJSP_SHUFFLE2(0, 0));
-    *y3  = _fjsp_shuffle_v2r8(t4, t4, GMX_FJSP_SHUFFLE2(1, 1));
-    *z3  = _fjsp_shuffle_v2r8(t5, t5, GMX_FJSP_SHUFFLE2(0, 0));
-    *x4  = _fjsp_shuffle_v2r8(t5, t5, GMX_FJSP_SHUFFLE2(1, 1));
-    *y4  = _fjsp_shuffle_v2r8(t6, t6, GMX_FJSP_SHUFFLE2(0, 0));
-    *z4  = _fjsp_shuffle_v2r8(t6, t6, GMX_FJSP_SHUFFLE2(1, 1));
-}
-
-
-
-static gmx_inline void
-gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(const double * gmx_restrict p1,
-                                      _fjsp_v2r8 * gmx_restrict x, _fjsp_v2r8 * gmx_restrict y, _fjsp_v2r8 * gmx_restrict z)
-{
-    *x            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1);
-    *y            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+1);
-    *z            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+2);
-}
-
-static gmx_inline void
-gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(const double * gmx_restrict p1,
-                                      _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1,
-                                      _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2,
-                                      _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3)
-{
-    *x1            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1);
-    *y1            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+1);
-    *z1            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+2);
-    *x2            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+3);
-    *y2            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+4);
-    *z2            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+5);
-    *x3            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+6);
-    *y3            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+7);
-    *z3            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+8);
-}
-
-static gmx_inline void
-gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(const double * gmx_restrict p1,
-                                      _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1,
-                                      _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2,
-                                      _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3,
-                                      _fjsp_v2r8 * gmx_restrict x4, _fjsp_v2r8 * gmx_restrict y4, _fjsp_v2r8 * gmx_restrict z4)
-{
-    *x1            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1);
-    *y1            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+1);
-    *z1            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+2);
-    *x2            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+3);
-    *y2            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+4);
-    *z2            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+5);
-    *x3            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+6);
-    *y3            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+7);
-    *z3            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+8);
-    *x4            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+9);
-    *y4            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+10);
-    *z4            = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+11);
-}
-
-
-static gmx_inline void
-gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(const double * gmx_restrict ptrA,
-                                      const double * gmx_restrict ptrB,
-                                      _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1)
-{
-    _fjsp_v2r8 t1, t2, t3, t4;
-    t1           = _fjsp_load_v2r8(ptrA);
-    t2           = _fjsp_load_v2r8(ptrB);
-    t3           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+2);
-    t4           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB+2);
-    GMX_FJSP_TRANSPOSE2_V2R8(t1, t2);
-    *x1          = t1;
-    *y1          = t2;
-    *z1          = _fjsp_unpacklo_v2r8(t3, t4);
-}
-
-static gmx_inline void
-gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(const double * gmx_restrict ptrA, const double * gmx_restrict ptrB,
-                                      _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1,
-                                      _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2,
-                                      _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3)
-{
-    _fjsp_v2r8 t1, t2, t3, t4, t5, t6, t7, t8, t9, t10;
-    t1           = _fjsp_load_v2r8(ptrA);
-    t2           = _fjsp_load_v2r8(ptrB);
-    t3           = _fjsp_load_v2r8(ptrA+2);
-    t4           = _fjsp_load_v2r8(ptrB+2);
-    t5           = _fjsp_load_v2r8(ptrA+4);
-    t6           = _fjsp_load_v2r8(ptrB+4);
-    t7           = _fjsp_load_v2r8(ptrA+6);
-    t8           = _fjsp_load_v2r8(ptrB+6);
-    t9           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+8);
-    t10          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB+8);
-    GMX_FJSP_TRANSPOSE2_V2R8(t1, t2);
-    GMX_FJSP_TRANSPOSE2_V2R8(t3, t4);
-    GMX_FJSP_TRANSPOSE2_V2R8(t5, t6);
-    GMX_FJSP_TRANSPOSE2_V2R8(t7, t8);
-    *x1          = t1;
-    *y1          = t2;
-    *z1          = t3;
-    *x2          = t4;
-    *y2          = t5;
-    *z2          = t6;
-    *x3          = t7;
-    *y3          = t8;
-    *z3          = _fjsp_unpacklo_v2r8(t9, t10);
-}
-
-
-static gmx_inline void
-gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(const double * gmx_restrict ptrA, const double * gmx_restrict ptrB,
-                                      _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1,
-                                      _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2,
-                                      _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3,
-                                      _fjsp_v2r8 * gmx_restrict x4, _fjsp_v2r8 * gmx_restrict y4, _fjsp_v2r8 * gmx_restrict z4)
-{
-    _fjsp_v2r8 t1, t2, t3, t4, t5, t6;
-    t1           = _fjsp_load_v2r8(ptrA);
-    t2           = _fjsp_load_v2r8(ptrB);
-    t3           = _fjsp_load_v2r8(ptrA+2);
-    t4           = _fjsp_load_v2r8(ptrB+2);
-    t5           = _fjsp_load_v2r8(ptrA+4);
-    t6           = _fjsp_load_v2r8(ptrB+4);
-    GMX_FJSP_TRANSPOSE2_V2R8(t1, t2);
-    GMX_FJSP_TRANSPOSE2_V2R8(t3, t4);
-    GMX_FJSP_TRANSPOSE2_V2R8(t5, t6);
-    *x1          = t1;
-    *y1          = t2;
-    *z1          = t3;
-    *x2          = t4;
-    *y2          = t5;
-    *z2          = t6;
-    t1           = _fjsp_load_v2r8(ptrA+6);
-    t2           = _fjsp_load_v2r8(ptrB+6);
-    t3           = _fjsp_load_v2r8(ptrA+8);
-    t4           = _fjsp_load_v2r8(ptrB+8);
-    t5           = _fjsp_load_v2r8(ptrA+10);
-    t6           = _fjsp_load_v2r8(ptrB+10);
-    GMX_FJSP_TRANSPOSE2_V2R8(t1, t2);
-    GMX_FJSP_TRANSPOSE2_V2R8(t3, t4);
-    GMX_FJSP_TRANSPOSE2_V2R8(t5, t6);
-    *x3          = t1;
-    *y3          = t2;
-    *z3          = t3;
-    *x4          = t4;
-    *y4          = t5;
-    *z4          = t6;
-}
-
-
-static void
-gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(double * gmx_restrict ptrA,
-                                           _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1)
-{
-    _fjsp_v2r8 t1, t2, t3;
-
-    t1           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA);
-    t2           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+1);
-    t3           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+2);
-
-    t1           = _fjsp_sub_v2r8(t1, x1);
-    t2           = _fjsp_sub_v2r8(t2, y1);
-    t3           = _fjsp_sub_v2r8(t3, z1);
-    _fjsp_storel_v2r8(ptrA, t1);
-    _fjsp_storel_v2r8(ptrA+1, t2);
-    _fjsp_storel_v2r8(ptrA+2, t3);
-}
-
-static void
-gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(double * gmx_restrict ptrA, _fjsp_v2r8 fscal,
-                                               _fjsp_v2r8 dx1, _fjsp_v2r8 dy1, _fjsp_v2r8 dz1)
-{
-    _fjsp_v2r8 t1, t2, t3;
-
-    t1           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA);
-    t2           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+1);
-    t3           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+2);
-
-    t1           = _fjsp_nmsub_v2r8(fscal, dx1, t1);
-    t2           = _fjsp_nmsub_v2r8(fscal, dy1, t2);
-    t3           = _fjsp_nmsub_v2r8(fscal, dz1, t3);
-    _fjsp_storel_v2r8(ptrA, t1);
-    _fjsp_storel_v2r8(ptrA+1, t2);
-    _fjsp_storel_v2r8(ptrA+2, t3);
-}
-
-
-static void
-gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(double * gmx_restrict ptrA,
-                                           _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1,
-                                           _fjsp_v2r8 x2, _fjsp_v2r8 y2, _fjsp_v2r8 z2,
-                                           _fjsp_v2r8 x3, _fjsp_v2r8 y3, _fjsp_v2r8 z3)
-{
-    _fjsp_v2r8 t1, t2, t3, t4, t5;
-
-    t1          = _fjsp_load_v2r8(ptrA);
-    t2          = _fjsp_load_v2r8(ptrA+2);
-    t3          = _fjsp_load_v2r8(ptrA+4);
-    t4          = _fjsp_load_v2r8(ptrA+6);
-    t5          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+8);
-
-    x1          = _fjsp_unpacklo_v2r8(x1, y1);
-    z1          = _fjsp_unpacklo_v2r8(z1, x2);
-    y2          = _fjsp_unpacklo_v2r8(y2, z2);
-    x3          = _fjsp_unpacklo_v2r8(x3, y3);
-    /* nothing to be done for z3 */
-
-    t1          = _fjsp_sub_v2r8(t1, x1);
-    t2          = _fjsp_sub_v2r8(t2, z1);
-    t3          = _fjsp_sub_v2r8(t3, y2);
-    t4          = _fjsp_sub_v2r8(t4, x3);
-    t5          = _fjsp_sub_v2r8(t5, z3);
-    _fjsp_storel_v2r8(ptrA, t1);
-    _fjsp_storeh_v2r8(ptrA+1, t1);
-    _fjsp_storel_v2r8(ptrA+2, t2);
-    _fjsp_storeh_v2r8(ptrA+3, t2);
-    _fjsp_storel_v2r8(ptrA+4, t3);
-    _fjsp_storeh_v2r8(ptrA+5, t3);
-    _fjsp_storel_v2r8(ptrA+6, t4);
-    _fjsp_storeh_v2r8(ptrA+7, t4);
-    _fjsp_storel_v2r8(ptrA+8, t5);
-}
-
-
-static void
-gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(double * gmx_restrict ptrA,
-                                           _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1,
-                                           _fjsp_v2r8 x2, _fjsp_v2r8 y2, _fjsp_v2r8 z2,
-                                           _fjsp_v2r8 x3, _fjsp_v2r8 y3, _fjsp_v2r8 z3,
-                                           _fjsp_v2r8 x4, _fjsp_v2r8 y4, _fjsp_v2r8 z4)
-{
-    _fjsp_v2r8 t1, t2, t3, t4, t5, t6;
-
-    t1          = _fjsp_load_v2r8(ptrA);
-    t2          = _fjsp_load_v2r8(ptrA+2);
-    t3          = _fjsp_load_v2r8(ptrA+4);
-    t4          = _fjsp_load_v2r8(ptrA+6);
-    t5          = _fjsp_load_v2r8(ptrA+8);
-    t6          = _fjsp_load_v2r8(ptrA+10);
-
-    x1          = _fjsp_unpacklo_v2r8(x1, y1);
-    z1          = _fjsp_unpacklo_v2r8(z1, x2);
-    y2          = _fjsp_unpacklo_v2r8(y2, z2);
-    x3          = _fjsp_unpacklo_v2r8(x3, y3);
-    z3          = _fjsp_unpacklo_v2r8(z3, x4);
-    y4          = _fjsp_unpacklo_v2r8(y4, z4);
-
-    _fjsp_storel_v2r8(ptrA,    _fjsp_sub_v2r8( t1, x1 ));
-    _fjsp_storeh_v2r8(ptrA+1,  _fjsp_sub_v2r8( t1, x1 ));
-    _fjsp_storel_v2r8(ptrA+2,  _fjsp_sub_v2r8( t2, z1 ));
-    _fjsp_storeh_v2r8(ptrA+3,  _fjsp_sub_v2r8( t2, z1 ));
-    _fjsp_storel_v2r8(ptrA+4,  _fjsp_sub_v2r8( t3, y2 ));
-    _fjsp_storeh_v2r8(ptrA+5,  _fjsp_sub_v2r8( t3, y2 ));
-    _fjsp_storel_v2r8(ptrA+6,  _fjsp_sub_v2r8( t4, x3 ));
-    _fjsp_storeh_v2r8(ptrA+7,  _fjsp_sub_v2r8( t4, x3 ));
-    _fjsp_storel_v2r8(ptrA+8,  _fjsp_sub_v2r8( t5, z3 ));
-    _fjsp_storeh_v2r8(ptrA+9,  _fjsp_sub_v2r8( t5, z3 ));
-    _fjsp_storel_v2r8(ptrA+10, _fjsp_sub_v2r8( t6, y4 ));
-    _fjsp_storeh_v2r8(ptrA+11, _fjsp_sub_v2r8( t6, y4 ));
-}
-
-static void
-gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(double * gmx_restrict ptrA, double * gmx_restrict ptrB,
-                                           _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1)
-{
-    _fjsp_v2r8 t1, t2, t3, t4, t5, t6, t7;
-
-    t1          = _fjsp_load_v2r8(ptrA);
-    t2          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+2);
-    t3          = _fjsp_load_v2r8(ptrB);
-    t4          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB+2);
-
-    t5          = _fjsp_unpacklo_v2r8(x1, y1);
-    t6          = _fjsp_unpackhi_v2r8(x1, y1);
-    t7          = _fjsp_unpackhi_v2r8(z1, z1);
-
-    t1          = _fjsp_sub_v2r8(t1, t5);
-    t2          = _fjsp_sub_v2r8(t2, z1);
-
-    t3          = _fjsp_sub_v2r8(t3, t6);
-    t4          = _fjsp_sub_v2r8(t4, t7);
-
-    _fjsp_storel_v2r8(ptrA, t1);
-    _fjsp_storeh_v2r8(ptrA+1, t1);
-    _fjsp_storel_v2r8(ptrA+2, t2);
-    _fjsp_storel_v2r8(ptrB, t3);
-    _fjsp_storeh_v2r8(ptrB+1, t3);
-    _fjsp_storel_v2r8(ptrB+2, t4);
-}
-
-
-static void
-gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(double * gmx_restrict ptrA, double * gmx_restrict ptrB,
-                                               _fjsp_v2r8 fscal, _fjsp_v2r8 dx1, _fjsp_v2r8 dy1, _fjsp_v2r8 dz1)
-{
-    _fjsp_v2r8 t1, t2, t3, t4, t5, t6, t7, fscalA, fscalB;
-
-    t1          = _fjsp_load_v2r8(ptrA);
-    t2          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+2);
-    t3          = _fjsp_load_v2r8(ptrB);
-    t4          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB+2);
-    fscalA      = _fjsp_unpacklo_v2r8(fscal, fscal);
-    fscalB      = _fjsp_unpackhi_v2r8(fscal, fscal);
-
-    t5          = _fjsp_unpacklo_v2r8(dx1, dy1);
-    t6          = _fjsp_unpackhi_v2r8(dx1, dy1);
-    t7          = _fjsp_unpackhi_v2r8(dz1, dz1);
-
-    t1          = _fjsp_nmsub_v2r8(fscalA, t5, t1);
-    t2          = _fjsp_nmsub_v2r8(fscalA, dz1, t2);
-
-    t3          = _fjsp_nmsub_v2r8(fscalB, t6, t3);
-    t4          = _fjsp_nmsub_v2r8(fscalB, t7, t4);
-
-    _fjsp_storel_v2r8(ptrA, t1);
-    _fjsp_storeh_v2r8(ptrA+1, t1);
-    _fjsp_storel_v2r8(ptrA+2, t2);
-    _fjsp_storel_v2r8(ptrB, t3);
-    _fjsp_storeh_v2r8(ptrB+1, t3);
-    _fjsp_storel_v2r8(ptrB+2, t4);
-}
-
-
-static void
-gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(double * gmx_restrict ptrA, double * gmx_restrict ptrB,
-                                           _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1,
-                                           _fjsp_v2r8 x2, _fjsp_v2r8 y2, _fjsp_v2r8 z2,
-                                           _fjsp_v2r8 x3, _fjsp_v2r8 y3, _fjsp_v2r8 z3)
-{
-    _fjsp_v2r8 t1, t2, t3, t4, t5, t6, t7, t8, t9, t10;
-    _fjsp_v2r8 tA, tB, tC, tD, tE, tF, tG, tH, tI;
-
-    t1          = _fjsp_load_v2r8(ptrA);
-    t2          = _fjsp_load_v2r8(ptrA+2);
-    t3          = _fjsp_load_v2r8(ptrA+4);
-    t4          = _fjsp_load_v2r8(ptrA+6);
-    t5          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+8);
-    t6          = _fjsp_load_v2r8(ptrB);
-    t7          = _fjsp_load_v2r8(ptrB+2);
-    t8          = _fjsp_load_v2r8(ptrB+4);
-    t9          = _fjsp_load_v2r8(ptrB+6);
-    t10         = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB+8);
-
-    tA          = _fjsp_unpacklo_v2r8(x1, y1);
-    tB          = _fjsp_unpackhi_v2r8(x1, y1);
-    tC          = _fjsp_unpacklo_v2r8(z1, x2);
-    tD          = _fjsp_unpackhi_v2r8(z1, x2);
-    tE          = _fjsp_unpacklo_v2r8(y2, z2);
-    tF          = _fjsp_unpackhi_v2r8(y2, z2);
-    tG          = _fjsp_unpacklo_v2r8(x3, y3);
-    tH          = _fjsp_unpackhi_v2r8(x3, y3);
-    tI          = _fjsp_unpackhi_v2r8(z3, z3);
-
-    t1          = _fjsp_sub_v2r8(t1, tA);
-    t2          = _fjsp_sub_v2r8(t2, tC);
-    t3          = _fjsp_sub_v2r8(t3, tE);
-    t4          = _fjsp_sub_v2r8(t4, tG);
-    t5          = _fjsp_sub_v2r8(t5, z3);
-
-    t6          = _fjsp_sub_v2r8(t6, tB);
-    t7          = _fjsp_sub_v2r8(t7, tD);
-    t8          = _fjsp_sub_v2r8(t8, tF);
-    t9          = _fjsp_sub_v2r8(t9, tH);
-    t10         = _fjsp_sub_v2r8(t10, tI);
-
-    _fjsp_storel_v2r8(ptrA, t1);
-    _fjsp_storeh_v2r8(ptrA+1, t1);
-    _fjsp_storel_v2r8(ptrA+2, t2);
-    _fjsp_storeh_v2r8(ptrA+3, t2);
-    _fjsp_storel_v2r8(ptrA+4, t3);
-    _fjsp_storeh_v2r8(ptrA+5, t3);
-    _fjsp_storel_v2r8(ptrA+6, t4);
-    _fjsp_storeh_v2r8(ptrA+7, t4);
-    _fjsp_storel_v2r8(ptrA+8, t5);
-    _fjsp_storel_v2r8(ptrB, t6);
-    _fjsp_storeh_v2r8(ptrB+1, t6);
-    _fjsp_storel_v2r8(ptrB+2, t7);
-    _fjsp_storeh_v2r8(ptrB+3, t7);
-    _fjsp_storel_v2r8(ptrB+4, t8);
-    _fjsp_storeh_v2r8(ptrB+5, t8);
-    _fjsp_storel_v2r8(ptrB+6, t9);
-    _fjsp_storeh_v2r8(ptrB+7, t9);
-    _fjsp_storel_v2r8(ptrB+8, t10);
-}
-
-
-static void
-gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(double * gmx_restrict ptrA, double * gmx_restrict ptrB,
-                                           _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1,
-                                           _fjsp_v2r8 x2, _fjsp_v2r8 y2, _fjsp_v2r8 z2,
-                                           _fjsp_v2r8 x3, _fjsp_v2r8 y3, _fjsp_v2r8 z3,
-                                           _fjsp_v2r8 x4, _fjsp_v2r8 y4, _fjsp_v2r8 z4)
-{
-    _fjsp_v2r8 t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12;
-    _fjsp_v2r8 tA, tB, tC, tD, tE, tF, tG, tH, tI, tJ, tK, tL;
-
-    t1          = _fjsp_load_v2r8(ptrA);
-    t2          = _fjsp_load_v2r8(ptrA+2);
-    t3          = _fjsp_load_v2r8(ptrA+4);
-    t4          = _fjsp_load_v2r8(ptrA+6);
-    t5          = _fjsp_load_v2r8(ptrA+8);
-    t6          = _fjsp_load_v2r8(ptrA+10);
-    t7          = _fjsp_load_v2r8(ptrB);
-    t8          = _fjsp_load_v2r8(ptrB+2);
-    t9          = _fjsp_load_v2r8(ptrB+4);
-    t10         = _fjsp_load_v2r8(ptrB+6);
-    t11         = _fjsp_load_v2r8(ptrB+8);
-    t12         = _fjsp_load_v2r8(ptrB+10);
-
-    tA          = _fjsp_unpacklo_v2r8(x1, y1);
-    tB          = _fjsp_unpackhi_v2r8(x1, y1);
-    tC          = _fjsp_unpacklo_v2r8(z1, x2);
-    tD          = _fjsp_unpackhi_v2r8(z1, x2);
-    tE          = _fjsp_unpacklo_v2r8(y2, z2);
-    tF          = _fjsp_unpackhi_v2r8(y2, z2);
-    tG          = _fjsp_unpacklo_v2r8(x3, y3);
-    tH          = _fjsp_unpackhi_v2r8(x3, y3);
-    tI          = _fjsp_unpacklo_v2r8(z3, x4);
-    tJ          = _fjsp_unpackhi_v2r8(z3, x4);
-    tK          = _fjsp_unpacklo_v2r8(y4, z4);
-    tL          = _fjsp_unpackhi_v2r8(y4, z4);
-
-    t1          = _fjsp_sub_v2r8(t1, tA);
-    t2          = _fjsp_sub_v2r8(t2, tC);
-    t3          = _fjsp_sub_v2r8(t3, tE);
-    t4          = _fjsp_sub_v2r8(t4, tG);
-    t5          = _fjsp_sub_v2r8(t5, tI);
-    t6          = _fjsp_sub_v2r8(t6, tK);
-
-    t7          = _fjsp_sub_v2r8(t7, tB);
-    t8          = _fjsp_sub_v2r8(t8, tD);
-    t9          = _fjsp_sub_v2r8(t9, tF);
-    t10         = _fjsp_sub_v2r8(t10, tH);
-    t11         = _fjsp_sub_v2r8(t11, tJ);
-    t12         = _fjsp_sub_v2r8(t12, tL);
-
-    _fjsp_storel_v2r8(ptrA,  t1);
-    _fjsp_storeh_v2r8(ptrA+1, t1);
-    _fjsp_storel_v2r8(ptrA+2, t2);
-    _fjsp_storeh_v2r8(ptrA+3, t2);
-    _fjsp_storel_v2r8(ptrA+4, t3);
-    _fjsp_storeh_v2r8(ptrA+5, t3);
-    _fjsp_storel_v2r8(ptrA+6, t4);
-    _fjsp_storeh_v2r8(ptrA+7, t4);
-    _fjsp_storel_v2r8(ptrA+8, t5);
-    _fjsp_storeh_v2r8(ptrA+9, t5);
-    _fjsp_storel_v2r8(ptrA+10, t6);
-    _fjsp_storeh_v2r8(ptrA+11, t6);
-    _fjsp_storel_v2r8(ptrB,  t7);
-    _fjsp_storeh_v2r8(ptrB+1, t7);
-    _fjsp_storel_v2r8(ptrB+2, t8);
-    _fjsp_storeh_v2r8(ptrB+3, t8);
-    _fjsp_storel_v2r8(ptrB+4, t9);
-    _fjsp_storeh_v2r8(ptrB+5, t9);
-    _fjsp_storel_v2r8(ptrB+6, t10);
-    _fjsp_storeh_v2r8(ptrB+7, t10);
-    _fjsp_storel_v2r8(ptrB+8, t11);
-    _fjsp_storeh_v2r8(ptrB+9, t11);
-    _fjsp_storel_v2r8(ptrB+10, t12);
-    _fjsp_storeh_v2r8(ptrB+11, t12);
-}
-
-
-
-static gmx_inline void
-gmx_fjsp_update_iforce_1atom_swizzle_v2r8(_fjsp_v2r8 fix1, _fjsp_v2r8 fiy1, _fjsp_v2r8 fiz1,
-                                          double * gmx_restrict fptr,
-                                          double * gmx_restrict fshiftptr)
-{
-    __m128d t1, t2, t3, t4;
-
-    /* transpose data */
-    t1   = fix1;
-    fix1 = _fjsp_unpacklo_v2r8(fix1, fiy1); /* y0 x0 */
-    fiy1 = _fjsp_unpackhi_v2r8(t1, fiy1);   /* y1 x1 */
-
-    fix1 = _fjsp_add_v2r8(fix1, fiy1);
-    fiz1 = _fjsp_add_v2r8( fiz1, _fjsp_unpackhi_v2r8(fiz1, fiz1 ));
-
-    t4 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr), fix1 );
-    _fjsp_storel_v2r8( fptr, t4 );
-    _fjsp_storeh_v2r8( fptr+1, t4 );
-    _fjsp_storel_v2r8( fptr+2, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), fptr+2), fiz1 ));
-
-    t4 = _fjsp_add_v2r8( _fjsp_load_v2r8(fshiftptr), fix1 );
-    _fjsp_storel_v2r8( fshiftptr, t4 );
-    _fjsp_storeh_v2r8( fshiftptr+1, t4 );
-    _fjsp_storel_v2r8( fshiftptr+2, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), fshiftptr+2), fiz1 ));
-}
-
-static gmx_inline void
-gmx_fjsp_update_iforce_3atom_swizzle_v2r8(_fjsp_v2r8 fix1, _fjsp_v2r8 fiy1, _fjsp_v2r8 fiz1,
-                                          _fjsp_v2r8 fix2, _fjsp_v2r8 fiy2, _fjsp_v2r8 fiz2,
-                                          _fjsp_v2r8 fix3, _fjsp_v2r8 fiy3, _fjsp_v2r8 fiz3,
-                                          double * gmx_restrict fptr,
-                                          double * gmx_restrict fshiftptr)
-{
-    __m128d t1, t2, t3, t4, t5, t6;
-
-    /* transpose data */
-    GMX_FJSP_TRANSPOSE2_V2R8(fix1, fiy1);
-    GMX_FJSP_TRANSPOSE2_V2R8(fiz1, fix2);
-    GMX_FJSP_TRANSPOSE2_V2R8(fiy2, fiz2);
-    t1   = fix3;
-    fix3 = _fjsp_unpacklo_v2r8(fix3, fiy3); /* y0 x0 */
-    fiy3 = _fjsp_unpackhi_v2r8(t1, fiy3);   /* y1 x1 */
-
-    fix1 = _fjsp_add_v2r8(fix1, fiy1);
-    fiz1 = _fjsp_add_v2r8(fiz1, fix2);
-    fiy2 = _fjsp_add_v2r8(fiy2, fiz2);
-
-    fix3 = _fjsp_add_v2r8(fix3, fiy3);
-    fiz3 = _fjsp_add_v2r8( fiz3, _fjsp_unpackhi_v2r8(fiz3, fiz3));
-
-    t3 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr), fix1 );
-    t4 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+2), fiz1 );
-    t5 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+4), fiy2 );
-    t6 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+6), fix3 );
-
-    _fjsp_storel_v2r8( fptr,   t3 );
-    _fjsp_storeh_v2r8( fptr+1, t3 );
-    _fjsp_storel_v2r8( fptr+2, t4 );
-    _fjsp_storeh_v2r8( fptr+3, t4 );
-    _fjsp_storel_v2r8( fptr+4, t5 );
-    _fjsp_storeh_v2r8( fptr+5, t5 );
-    _fjsp_storel_v2r8( fptr+6, t6 );
-    _fjsp_storeh_v2r8( fptr+7, t6 );
-    _fjsp_storel_v2r8( fptr+8, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), fptr+8), fiz3 ));
-
-    fix1 = _fjsp_add_v2r8(fix1, fix3);
-    t1   = _fjsp_shuffle_v2r8(fiz1, fiy2, GMX_FJSP_SHUFFLE2(0, 1));
-    fix1 = _fjsp_add_v2r8(fix1, t1); /* x and y sums */
-
-    t2   = _fjsp_shuffle_v2r8(fiy2, fiy2, GMX_FJSP_SHUFFLE2(1, 1));
-    fiz1 = _fjsp_add_v2r8(fiz1, fiz3);
-    fiz1 = _fjsp_add_v2r8(fiz1, t2); /* z sum */
-
-    t3 = _fjsp_add_v2r8( _fjsp_load_v2r8(fshiftptr), fix1 );
-    _fjsp_storel_v2r8( fshiftptr, t3 );
-    _fjsp_storeh_v2r8( fshiftptr+1, t3 );
-    _fjsp_storel_v2r8( fshiftptr+2, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), fshiftptr+2), fiz1 ));
-}
-
-
-static gmx_inline void
-gmx_fjsp_update_iforce_4atom_swizzle_v2r8(_fjsp_v2r8 fix1, _fjsp_v2r8 fiy1, _fjsp_v2r8 fiz1,
-                                          _fjsp_v2r8 fix2, _fjsp_v2r8 fiy2, _fjsp_v2r8 fiz2,
-                                          _fjsp_v2r8 fix3, _fjsp_v2r8 fiy3, _fjsp_v2r8 fiz3,
-                                          _fjsp_v2r8 fix4, _fjsp_v2r8 fiy4, _fjsp_v2r8 fiz4,
-                                          double * gmx_restrict fptr,
-                                          double * gmx_restrict fshiftptr)
-{
-    __m128d t1, t2, t3, t4, t5, t6, t7, t8;
-
-    /* transpose data */
-    GMX_FJSP_TRANSPOSE2_V2R8(fix1, fiy1);
-    GMX_FJSP_TRANSPOSE2_V2R8(fiz1, fix2);
-    GMX_FJSP_TRANSPOSE2_V2R8(fiy2, fiz2);
-    GMX_FJSP_TRANSPOSE2_V2R8(fix3, fiy3);
-    GMX_FJSP_TRANSPOSE2_V2R8(fiz3, fix4);
-    GMX_FJSP_TRANSPOSE2_V2R8(fiy4, fiz4);
-
-    fix1 = _fjsp_add_v2r8(fix1, fiy1);
-    fiz1 = _fjsp_add_v2r8(fiz1, fix2);
-    fiy2 = _fjsp_add_v2r8(fiy2, fiz2);
-    fix3 = _fjsp_add_v2r8(fix3, fiy3);
-    fiz3 = _fjsp_add_v2r8(fiz3, fix4);
-    fiy4 = _fjsp_add_v2r8(fiy4, fiz4);
-
-    t3 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr),    fix1 );
-    t4 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+2),  fiz1 );
-    t5 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+4),  fiy2 );
-    t6 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+6),  fix3 );
-    t7 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+8),  fiz3 );
-    t8 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+10), fiy4 );
-    _fjsp_storel_v2r8( fptr,    t3 );
-    _fjsp_storeh_v2r8( fptr+1,  t3 );
-    _fjsp_storel_v2r8( fptr+2,  t4 );
-    _fjsp_storeh_v2r8( fptr+3,  t4 );
-    _fjsp_storel_v2r8( fptr+4,  t5 );
-    _fjsp_storeh_v2r8( fptr+5,  t5 );
-    _fjsp_storel_v2r8( fptr+6,  t6 );
-    _fjsp_storeh_v2r8( fptr+7,  t6 );
-    _fjsp_storel_v2r8( fptr+8,  t7 );
-    _fjsp_storeh_v2r8( fptr+9,  t7 );
-    _fjsp_storel_v2r8( fptr+10, t8 );
-    _fjsp_storeh_v2r8( fptr+11, t8 );
-
-    t1   = _fjsp_shuffle_v2r8(fiz1, fiy2, GMX_FJSP_SHUFFLE2(0, 1));
-    fix1 = _fjsp_add_v2r8(fix1, t1);
-    t2   = _fjsp_shuffle_v2r8(fiz3, fiy4, GMX_FJSP_SHUFFLE2(0, 1));
-    fix3 = _fjsp_add_v2r8(fix3, t2);
-    fix1 = _fjsp_add_v2r8(fix1, fix3); /* x and y sums */
-
-    fiz1 = _fjsp_add_v2r8(fiz1, _fjsp_unpackhi_v2r8(fiy2, fiy2));
-    fiz3 = _fjsp_add_v2r8(fiz3, _fjsp_unpackhi_v2r8(fiy4, fiy4));
-    fiz1 = _fjsp_add_v2r8(fiz1, fiz3); /* z sum */
-
-    t3 = _fjsp_add_v2r8( _fjsp_load_v2r8(fshiftptr), fix1 );
-    _fjsp_storel_v2r8( fshiftptr, t3 );
-    _fjsp_storeh_v2r8( fshiftptr+1, t3 );
-    _fjsp_storel_v2r8( fshiftptr+2, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), fshiftptr+2), fiz1 ));
-}
-
-
-
-static gmx_inline void
-gmx_fjsp_update_1pot_v2r8(_fjsp_v2r8 pot1, double * gmx_restrict ptrA)
-{
-    pot1 = _fjsp_add_v2r8(pot1, _fjsp_unpackhi_v2r8(pot1, pot1));
-    _fjsp_storel_v2r8(ptrA, _fjsp_add_v2r8(pot1, _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA)));
-}
-
-static gmx_inline void
-gmx_fjsp_update_2pot_v2r8(_fjsp_v2r8 pot1, double * gmx_restrict ptrA,
-                          _fjsp_v2r8 pot2, double * gmx_restrict ptrB)
-{
-    GMX_FJSP_TRANSPOSE2_V2R8(pot1, pot2);
-    pot1 = _fjsp_add_v2r8(pot1, pot2);
-    pot2 = _fjsp_unpackhi_v2r8(pot1, pot1);
-
-    _fjsp_storel_v2r8(ptrA, _fjsp_add_v2r8(pot1, _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA)));
-    _fjsp_storel_v2r8(ptrB, _fjsp_add_v2r8(pot2, _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB)));
-}
-
-
-#endif /* _kernelutil_sparc64_hpc_ace_double_h_ */
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py
deleted file mode 100755 (executable)
index d49e1ca..0000000
+++ /dev/null
@@ -1,513 +0,0 @@
-#!/usr/bin/env python2
-#
-# This file is part of the GROMACS molecular simulation package.
-#
-# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
-# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
-# and including many others, as listed in the AUTHORS file in the
-# top-level source directory and at http://www.gromacs.org.
-#
-# GROMACS is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public License
-# as published by the Free Software Foundation; either version 2.1
-# of the License, or (at your option) any later version.
-#
-# GROMACS is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with GROMACS; if not, see
-# http://www.gnu.org/licenses, or write to the Free Software Foundation,
-# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
-#
-# If you want to redistribute modifications to GROMACS, please
-# consider that scientific software is very special. Version
-# control is crucial - bugs must be traceable. We will be happy to
-# consider code for inclusion in the official distribution, but
-# derived work must not be called official GROMACS. Details are found
-# in the README & COPYING files - if they are missing, get the
-# official version at http://www.gromacs.org.
-#
-# To help us fund GROMACS development, we humbly ask that you cite
-# the research papers on the package. Check out http://www.gromacs.org.
-
-import sys
-import os
-sys.path.append("../preprocessor")
-sys.path.append("../../../../../admin")
-from copyright import create_copyright_header
-from gmxpreprocess import gmxpreprocess
-
-# "The happiest programs are programs that write other programs."
-#
-#
-# This script controls the generation of Gromacs nonbonded kernels.
-#
-# We no longer generate kernels on-the-fly, so this file is not run
-# during a Gromacs compile - only when we need to update the kernels (=rarely).
-#
-# To maximize performance, each combination of interactions in Gromacs
-# has a separate nonbonded kernel without conditionals in the code.
-# To avoid writing hundreds of different routines for each architecture,
-# we instead use a custom preprocessor so we can encode the conditionals
-# and expand for-loops (e.g, for water-water interactions)
-# from a general kernel template. While that file will contain quite a
-# few preprocessor directives, it is still an order of magnitude easier
-# to maintain than ~200 different kernels (not to mention it avoids bugs).
-#
-# To actually generate the kernels, this program iteratively calls the
-# preprocessor with different define settings corresponding to all
-# combinations of coulomb/van-der-Waals/geometry options.
-#
-# A main goal in the design was to make this new generator _general_. For
-# this reason we have used a lot of different fields to identify a particular
-# kernel and interaction. Basically, each kernel will have a name like
-#
-# nbkernel_ElecXX_VdwYY_GeomZZ_VF_QQ()
-#
-# Where XX/YY/ZZ/VF are strings to identify what the kernel computes.
-#
-# Elec/Vdw describe the type of interaction for electrostatics and van der Waals.
-# The geometry settings correspond e.g. to water-water or water-particle kernels,
-# and finally the VF setting is V,F,or VF depending on whether we calculate
-# only the potential, only the force, or both of them. The final string (QQ)
-# is the architecture/language/optimization of the kernel.
-#
-Arch       = 'sparc64_hpc_ace_double'
-
-# Explanation of the 'properties':
-#
-# It is cheap to compute r^2, and the kernels require various other functions of r for
-# different kinds of interaction. Depending on the needs of the kernel and the available
-# processor instructions, this will be done in different ways.
-#
-# 'rinv' means we need 1/r, which is calculated as 1/sqrt(r^2).
-# 'rinvsq' means we need 1/(r*r). This is calculated as rinv*rinv if we already did rinv, otherwise 1/r^2.
-# 'r' is similarly calculated as r^2*rinv when needed
-# 'table' means the interaction is tabulated, in which case we will calculate a table index before the interaction
-# 'shift' means the interaction will be modified by a constant to make it zero at the cutoff.
-# 'cutoff' means the interaction is set to 0.0 outside the cutoff
-#
-
-FileHeader = create_copyright_header('2012,2013,2014,2015,2017,2018')
-FileHeader += """/*
- * Note: this file was generated by the GROMACS """+Arch+""" kernel generator.
- */
-"""
-
-###############################################
-# ELECTROSTATICS
-# Interactions and flags for them
-###############################################
-ElectrostaticsList = {
-    'None'                    : [],
-    'Coulomb'                 : ['rinv','rinvsq'],
-    'ReactionField'           : ['rinv','rinvsq'],
-    'CubicSplineTable'        : ['rinv','r','table'],
-    'Ewald'                   : ['rinv','rinvsq','r'],
-}
-
-
-###############################################
-# VAN DER WAALS
-# Interactions and flags for them
-###############################################
-VdwList = {
-    'None'                    : [],
-    'LennardJones'            : ['rinvsq'],
-#    'Buckingham'              : ['rinv','rinvsq','r'], # Disabled for sse4.1 to reduce number of kernels and simply the template
-    'CubicSplineTable'        : ['rinv','r','table'],
-    'LJEwald'                 : ['rinv','rinvsq','r'],
-}
-
-
-###############################################
-# MODIFIERS
-# Different ways to adjust/modify interactions to conserve energy
-###############################################
-ModifierList = {
-    'None'                    : [],
-    'ExactCutoff'             : ['exactcutoff'],        # Zero the interaction outside the cutoff, used for reaction-field-zero
-    'PotentialShift'          : ['shift','exactcutoff'],
-    'PotentialSwitch'         : ['rinv','r','switch','exactcutoff']
-}
-
-
-###############################################
-# GEOMETRY COMBINATIONS
-###############################################
-GeometryNameList = [
-    [ 'Particle' , 'Particle' ],
-    [ 'Water3'   , 'Particle' ],
-    [ 'Water3'   , 'Water3'   ],
-    [ 'Water4'   , 'Particle' ],
-    [ 'Water4'   , 'Water4'   ]
-]
-
-
-###############################################
-# POTENTIAL / FORCE
-###############################################
-VFList = [
-    'PotentialAndForce',
-# 'Potential',   # Not used yet
-    'Force'
-]
-
-
-###############################################
-# GEOMETRY PROPERTIES
-###############################################
-# Dictionaries with lists telling which interactions are present
-# 1,2,3 means particles 1,2,3 (but not 0) have electrostatics!
-GeometryElectrostatics = {
-    'Particle'  : [ 0 ],
-    'Particle2' : [ 0 , 1 ],
-    'Particle3' : [ 0 , 1 , 2 ],
-    'Particle4' : [ 0 , 1 , 2 , 3 ],
-    'Water3'    : [ 0 , 1 , 2 ],
-    'Water4'    : [ 1 , 2 , 3 ]
-}
-
-GeometryVdw = {
-    'Particle'  : [ 0 ],
-    'Particle2' : [ 0 , 1 ],
-    'Particle3' : [ 0 , 1 , 2 ],
-    'Particle4' : [ 0 , 1 , 2 , 3 ],
-    'Water3'    : [ 0 ],
-    'Water4'    : [ 0 ]
-}
-
-
-
-
-# Dictionary to abbreviate all strings (mixed from all the lists)
-Abbreviation = {
-    'None'                    : 'None',
-    'Coulomb'                 : 'Coul',
-    'Ewald'                   : 'Ew',
-    'ReactionField'           : 'RF',
-    'CubicSplineTable'        : 'CSTab',
-    'LennardJones'            : 'LJ',
-    'Buckingham'              : 'Bham',
-    'LJEwald'                 : 'LJEw',
-    'PotentialShift'          : 'Sh',
-    'PotentialSwitch'         : 'Sw',
-    'ExactCutoff'             : 'Cut',
-    'PotentialAndForce'       : 'VF',
-    'Potential'               : 'V',
-    'Force'                   : 'F',
-    'Water3'                  : 'W3',
-    'Water4'                  : 'W4',
-    'Particle'                : 'P1',
-    'Particle2'               : 'P2',
-    'Particle3'               : 'P3',
-    'Particle4'               : 'P4'
-}
-
-
-###############################################
-# Functions
-###############################################
-
-# Return a string with the kernel name from current settings
-def MakeKernelFileName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom):
-    ElecStr = 'Elec' + Abbreviation[KernelElec]
-    if(KernelElecMod!='None'):
-        ElecStr = ElecStr + Abbreviation[KernelElecMod]
-    VdwStr  = 'Vdw'  + Abbreviation[KernelVdw]
-    if(KernelVdwMod!='None'):
-        VdwStr = VdwStr + Abbreviation[KernelVdwMod]
-    GeomStr = 'Geom' + Abbreviation[KernelGeom[0]] + Abbreviation[KernelGeom[1]]
-    return 'nb_kernel_' + ElecStr + '_' + VdwStr + '_' + GeomStr + '_' + Arch
-
-def MakeKernelName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF):
-    ElecStr = 'Elec' + Abbreviation[KernelElec]
-    if(KernelElecMod!='None'):
-        ElecStr = ElecStr + Abbreviation[KernelElecMod]
-    VdwStr  = 'Vdw'  + Abbreviation[KernelVdw]
-    if(KernelVdwMod!='None'):
-        VdwStr = VdwStr + Abbreviation[KernelVdwMod]
-    GeomStr = 'Geom' + Abbreviation[KernelGeom[0]] + Abbreviation[KernelGeom[1]]
-    VFStr   = Abbreviation[KernelVF]
-    return 'nb_kernel_' + ElecStr + '_' + VdwStr + '_' + GeomStr + '_' + VFStr + '_' + Arch
-
-# Return a string with a declaration to use for the kernel;
-# this will be a sequence of string combinations as well as the actual function name
-# Dont worry about field widths - that is just pretty-printing for the header!
-def MakeKernelDecl(KernelName,KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelOther,KernelVF):
-    KernelStr   = '\"'+KernelName+'\"'
-    ArchStr     = '\"'+Arch+'\"'
-    ElecStr     = '\"'+KernelElec+'\"'
-    ElecModStr  = '\"'+KernelElecMod+'\"'
-    VdwStr      = '\"'+KernelVdw+'\"'
-    VdwModStr   = '\"'+KernelVdwMod+'\"'
-    GeomStr     = '\"'+KernelGeom[0]+KernelGeom[1]+'\"'
-    OtherStr    = '\"'+KernelOther+'\"'
-    VFStr       = '\"'+KernelVF+'\"'
-
-    ThisSpec = ArchStr+', '+ElecStr+', '+ElecModStr+', '+VdwStr+', '+VdwModStr+', '+GeomStr+', '+OtherStr+', '+VFStr
-    ThisDecl = '    { '+KernelName+', '+KernelStr+', '+ThisSpec+' }'
-    return ThisDecl
-
-
-# Returns 1 if this kernel should be created, 0 if we should skip it
-# This routine is not critical - it is not the end of the world if we create more kernels,
-# but since the number is pretty large we save both space and compile-time by reducing it a bit.
-def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF):
-
-    # No need for kernels without interactions
-    if(KernelElec=='None' and KernelVdw=='None'):
-        return 0
-
-    # No need for modifiers without interactions
-    if((KernelElec=='None' and KernelElecMod!='None') or (KernelVdw=='None' and KernelVdwMod!='None')):
-        return 0
-
-    # No need for LJ-only water optimization, or water optimization with implicit solvent.
-    if('Water' in KernelGeom[0] and KernelElec=='None'):
-        return 0
-
-    # Non-matching table settings are pointless
-    if( ('Table' in KernelElec) and ('Table' in KernelVdw) and KernelElec!=KernelVdw ):
-        return 0
-
-    # Try to reduce the number of different switch/shift options to get a reasonable number of kernels
-    # For electrostatics, reaction-field can use 'exactcutoff', and ewald can use switch or shift.
-    if(KernelElecMod=='ExactCutoff' and KernelElec!='ReactionField'):
-        return 0
-    if(KernelElecMod in ['PotentialShift','PotentialSwitch'] and KernelElec!='Ewald'):
-        return 0
-    # For Vdw, we support switch and shift for Lennard-Jones/Buckingham
-    if((KernelVdwMod=='ExactCutoff') or
-       (KernelVdwMod in ['PotentialShift','PotentialSwitch'] and KernelVdw not in ['LennardJones','Buckingham','LJEwald'])):
-        return 0
-
-    # For LJEwald, we only support shift
-    if(KernelVdw=='LJEwald' and KernelVdwMod=='PotentialSwitch'):
-        return 0
-
-    # Choose either switch or shift and don't mix them...
-    if((KernelElecMod=='PotentialShift' and KernelVdwMod=='PotentialSwitch') or
-       (KernelElecMod=='PotentialSwitch' and KernelVdwMod=='PotentialShift')):
-        return 0
-
-    # Don't use a Vdw kernel with a modifier if the electrostatics one does not have one
-    if(KernelElec!='None' and KernelElecMod=='None' and KernelVdwMod!='None'):
-        return 0
-
-    # Don't use an electrostatics kernel with a modifier if the vdw one does not have one,
-    # unless the electrostatics one is reaction-field with exact cutoff.
-    if(KernelVdw!='None' and KernelVdwMod=='None' and KernelElecMod!='None'):
-        if(KernelElec=='ReactionField' and KernelVdw!='CubicSplineTable'):
-            return 0
-        elif(KernelElec!='ReactionField'):
-            return 0
-
-    #Only do LJ-PME if we are also doing PME for electrostatics, or no electrostatics at all.
-    if(KernelVdw=='LJEwald' and KernelElec not in ['Ewald','None']):
-        return 0
-
-    return 1
-
-
-
-#
-# The preprocessor will automatically expand the interactions for water and other
-# geometries inside the kernel, but to get this right we need to setup a couple
-# of defines - we do them in a separate routine to keep the main loop clean.
-#
-# While this routine might look a bit complex it is actually quite straightforward,
-# and the best news is that you wont have to modify _anything_ for a new geometry
-# as long as you correctly define its Electrostatics/Vdw geometry in the lists above!
-#
-def SetDefines(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF,defines):
-    # What is the _name_ for the i/j group geometry?
-    igeometry            = KernelGeom[0]
-    jgeometry            = KernelGeom[1]
-    # define so we can access it in the source when the preprocessor runs
-    defines['GEOMETRY_I'] = igeometry
-    defines['GEOMETRY_J'] = jgeometry
-
-    # For the i/j groups, extract a python list of which sites have electrostatics
-    # For SPC/TIP3p this will be [1,1,1], while TIP4p (no elec on first site) will be [0,1,1,1]
-    ielec                = GeometryElectrostatics[igeometry]
-    jelec                = GeometryElectrostatics[jgeometry]
-    # Zero out the corresponding lists in case we dont do Elec
-    if(KernelElec=='None'):
-        ielec = []
-        jelec = []
-
-    # Extract similar interaction lists for Vdw interactions (example for SPC: [1,0,0])
-    iVdw                 = GeometryVdw[igeometry]
-    jVdw                 = GeometryVdw[jgeometry]
-
-    # Zero out the corresponding lists in case we dont do Vdw
-    if(KernelVdw=='None'):
-        iVdw = []
-        jVdw = []
-
-    # iany[] and jany[] contains lists of the particles actually used (for interactions) in this kernel
-    iany = list(set(ielec+iVdw))  # convert to+from set to make elements unique
-    jany = list(set(jelec+jVdw))
-
-    defines['PARTICLES_ELEC_I'] = ielec
-    defines['PARTICLES_ELEC_J'] = jelec
-    defines['PARTICLES_VDW_I']  = iVdw
-    defines['PARTICLES_VDW_J']  = jVdw
-    defines['PARTICLES_I']      = iany
-    defines['PARTICLES_J']      = jany
-
-    # elecij,Vdwij are sets with pairs of particles for which the corresponding interaction is done
-    # (and anyij again corresponds to either electrostatics or Vdw)
-    elecij = []
-    Vdwij  = []
-    anyij  = []
-
-    for i in ielec:
-        for j in jelec:
-            elecij.append([i,j])
-
-    for i in iVdw:
-        for j in jVdw:
-            Vdwij.append([i,j])
-
-    for i in iany:
-        for j in jany:
-            if [i,j] in elecij or [i,j] in Vdwij:
-                anyij.append([i,j])
-
-    defines['PAIRS_IJ']     = anyij
-
-    # Make an 2d list-of-distance-properties-to-calculate for i,j
-    ni = max(iany)+1
-    nj = max(jany)+1
-    # Each element properties[i][j] is an empty list
-    properties = [ [ [] for j in range(0,nj) ] for i in range (0,ni) ]
-    # Add properties to each set
-    for i in range(0,ni):
-        for j in range(0,nj):
-            if [i,j] in elecij:
-                properties[i][j] = properties[i][j] + ['electrostatics'] + ElectrostaticsList[KernelElec] + ModifierList[KernelElecMod]
-            if [i,j] in Vdwij:
-                properties[i][j] = properties[i][j] + ['vdw'] + VdwList[KernelVdw] + ModifierList[KernelVdwMod]
-            # Add rinv if we need r
-            if 'r' in properties[i][j]:
-                properties[i][j] = properties[i][j] + ['rinv']
-            # Add rsq if we need rinv or rinsq
-            if 'rinv' in properties[i][j] or 'rinvsq' in properties[i][j]:
-                properties[i][j] = properties[i][j] + ['rsq']
-
-    defines['INTERACTION_FLAGS']    = properties
-
-
-
-def PrintStatistics(ratio):
-    ratio = 100.0*ratio
-    print '\rGenerating %s nonbonded kernels... %5.1f%%' % (Arch,ratio),
-    sys.stdout.flush()
-
-
-
-defines = {}
-kerneldecl = []
-
-cnt     = 0.0
-nelec   = len(ElectrostaticsList)
-nVdw    = len(VdwList)
-nmod    = len(ModifierList)
-ngeom   = len(GeometryNameList)
-
-ntot    = nelec*nmod*nVdw*nmod*ngeom
-
-numKernels = 0
-
-fpdecl = open('nb_kernel_' + Arch + '.cpp','w')
-fpdecl.write( FileHeader )
-fpdecl.write( '#include "gmxpre.h"\n\n' )
-fpdecl.write( '#include "gromacs/gmxlib/nonbonded/nb_kernel.h"\n\n' )
-
-for KernelElec in ElectrostaticsList:
-    defines['KERNEL_ELEC'] = KernelElec
-
-    for KernelElecMod in ModifierList:
-        defines['KERNEL_MOD_ELEC'] = KernelElecMod
-
-        for KernelVdw in VdwList:
-            defines['KERNEL_VDW'] = KernelVdw
-
-            for KernelVdwMod in ModifierList:
-                defines['KERNEL_MOD_VDW'] = KernelVdwMod
-
-                for KernelGeom in GeometryNameList:
-
-                    cnt += 1
-                    KernelFilename = MakeKernelFileName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom) + '.cpp'
-                    fpkernel = open(KernelFilename,'w')
-                    defines['INCLUDE_HEADER'] = 1  # Include header first time in new file
-                    DoHeader = 1
-
-                    for KernelVF in VFList:
-
-                        KernelName = MakeKernelName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF)
-
-                        defines['KERNEL_NAME'] = KernelName
-                        defines['KERNEL_VF']   = KernelVF
-
-                        # Check if this is a valid/sane/usable combination
-                        if not KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF):
-                            continue;
-
-                        # The overall kernel settings determine what the _kernel_ calculates, but for the water
-                        # kernels this does not mean that every pairwise interaction has e.g. Vdw interactions.
-                        # This routine sets defines of what to calculate for each pair of particles in those cases.
-                        SetDefines(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF,defines)
-
-                        if(DoHeader==1):
-                            fpkernel.write( FileHeader )
-
-                        gmxpreprocess('nb_kernel_template_' + Arch + '.pre', KernelName+'.tmp' , defines, force=1,contentType='C')
-                        numKernels = numKernels + 1
-
-                        defines['INCLUDE_HEADER'] = 0   # Header has been included once now
-                        DoHeader=0
-
-                        # Append temp file contents to the common kernelfile
-                        fptmp = open(KernelName+'.tmp','r')
-                        fpkernel.writelines(fptmp.readlines())
-                        fptmp.close()
-                        os.remove(KernelName+'.tmp')
-
-                        # Add an extern declaration for this kernel
-                        fpdecl.write('extern nb_kernel_t ' + KernelName + ';\n');
-
-                        # Add declaration to the buffer
-                        KernelOther=''
-                        kerneldecl.append(MakeKernelDecl(KernelName,KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelOther,KernelVF))
-
-                    filesize = fpkernel.tell()
-                    fpkernel.close()
-                    if(filesize==0):
-                        os.remove(KernelFilename)
-
-                    PrintStatistics(cnt/ntot)
-                pass
-            pass
-        pass
-    pass
-pass
-
-# Write out the list of settings and corresponding kernels to the declaration file
-fpdecl.write( '\n\n' )
-fpdecl.write( 'nb_kernel_info_t\n' )
-fpdecl.write( '    kernellist_'+Arch+'[] =\n' )
-fpdecl.write( '{\n' )
-for decl in kerneldecl[0:-1]:
-    fpdecl.write( decl + ',\n' )
-fpdecl.write( kerneldecl[-1] + '\n' )
-fpdecl.write( '};\n\n' )
-fpdecl.write( 'int\n' )
-fpdecl.write( '    kernellist_'+Arch+'_size = sizeof(kernellist_'+Arch+')/sizeof(kernellist_'+Arch+'[0]);\n')
-fpdecl.close()
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 24aca65..0000000
+++ /dev/null
@@ -1,711 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 76 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 76 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*76);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 64 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 64 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*64);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index bd3dc4e..0000000
+++ /dev/null
@@ -1,1173 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 171 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 171 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*171);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 151 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 151 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*151);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 8db123a..0000000
+++ /dev/null
@@ -1,2309 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r01,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq01,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r02,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq02,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq11,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq12,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq21,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq22,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 444 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r01,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq01,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r02,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq02,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq11,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq12,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq21,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq22,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 444 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*444);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r01,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r02,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 400 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r01,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r02,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 400 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*400);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 4c65aec..0000000
+++ /dev/null
@@ -1,1329 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r30,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq30,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 200 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r30,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq30,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 200 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*200);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r30,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 180 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r30,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 180 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*180);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index fd56968..0000000
+++ /dev/null
@@ -1,2477 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq11,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq12,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r13,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq13,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq21,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq22,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r23,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq23,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r31,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq31,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r32,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq32,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r33,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq33,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 476 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq11,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq12,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r13,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq13,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq21,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq22,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r23,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq23,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r31,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq31,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r32,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq32,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r33,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq33,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 476 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*476);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r13,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r23,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r31,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r32,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r33,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 432 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r13,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r23,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r31,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r32,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r33,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 432 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*432);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 8ead884..0000000
+++ /dev/null
@@ -1,635 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 59 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 59 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*59);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 50 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 50 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*50);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 44a9fac..0000000
+++ /dev/null
@@ -1,1097 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 154 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 154 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*154);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 137 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 137 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*137);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 270c226..0000000
+++ /dev/null
@@ -1,2233 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r01,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq01,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r02,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq02,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq11,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq12,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq21,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq22,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 427 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r01,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq01,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r02,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq02,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq11,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq12,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq21,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq22,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 427 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*427);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r01,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r02,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 386 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r01,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r02,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 386 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*386);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index d30cf99..0000000
+++ /dev/null
@@ -1,1201 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r30,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq30,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 176 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r30,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq30,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 176 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*176);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r30,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 159 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r30,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 159 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*159);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 49fc779..0000000
+++ /dev/null
@@ -1,2349 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq11,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq12,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r13,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq13,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq21,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq22,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r23,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq23,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r31,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq31,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r32,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq32,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r33,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq33,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 452 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq11,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq12,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r13,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq13,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq21,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq22,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r23,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq23,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r31,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq31,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r32,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq32,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r33,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq33,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 452 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*452);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r13,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r23,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r31,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r32,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r33,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 411 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r13,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r23,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r31,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r32,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r33,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 411 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*411);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index c84d9da..0000000
+++ /dev/null
@@ -1,562 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 46 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 46 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 8 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*46);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 42 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 42 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*42);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 08edfc3..0000000
+++ /dev/null
@@ -1,1024 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            None
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 141 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 141 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*141);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            None
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 129 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 129 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*18 + inneriter*129);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index e07efb0..0000000
+++ /dev/null
@@ -1,2168 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            None
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r01,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq01,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r02,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq02,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq11,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq12,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq21,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq22,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 414 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq00,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r01,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq01,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r02,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq02,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq11,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq12,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq21,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq22,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 414 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*414);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            None
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r01,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r02,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 378 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r01,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r02,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 378 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*378);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 5c79ee5..0000000
+++ /dev/null
@@ -1,1024 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            None
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r30,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq30,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 141 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq10,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq20,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r30,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq30,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 141 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*141);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            None
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r30,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 129 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r10,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r20,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r30,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 129 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*18 + inneriter*129);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 9d75297..0000000
+++ /dev/null
@@ -1,2168 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            None
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq11,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq12,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r13,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq13,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq21,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq22,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r23,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq23,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r31,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq31,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r32,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq32,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r33,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq33,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 414 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq11,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq12,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r13,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq13,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq21,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq22,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r23,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq23,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r31,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq31,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r32,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq32,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r33,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq33,VV);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 414 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*414);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction:            None
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r13,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r23,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r31,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r32,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r33,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 378 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r11,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r12,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r13,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r21,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r22,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r23,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r31,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r32,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r33,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 378 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*378);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 37e0a2a..0000000
+++ /dev/null
@@ -1,679 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 66 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 66 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*66);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 57 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 57 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*57);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 002de2e..0000000
+++ /dev/null
@@ -1,989 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 131 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 131 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*131);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 120 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 120 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*120);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index c811894..0000000
+++ /dev/null
@@ -1,1669 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,rinv01);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq01);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,rinv02);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq02);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 314 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,rinv01);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq01);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,rinv02);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq02);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 314 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*314);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,rinv01);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq01);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,rinv02);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq02);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 297 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,rinv01);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq01);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,rinv02);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq02);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 297 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*297);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 0843afa..0000000
+++ /dev/null
@@ -1,1097 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,rinv30);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq30);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 155 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,rinv30);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq30);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 155 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*155);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,rinv30);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq30);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 144 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,rinv30);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq30);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 144 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*144);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 8ec8beb..0000000
+++ /dev/null
@@ -1,1789 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,rinv13);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq13);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,rinv23);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq23);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,rinv31);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq31);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,rinv32);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq32);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,rinv33);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq33);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 341 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,rinv13);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq13);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,rinv23);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq23);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,rinv31);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq31);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,rinv32);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq32);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,rinv33);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq33);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 341 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*341);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,rinv13);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq13);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,rinv23);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq23);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,rinv31);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq31);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,rinv32);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq32);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,rinv33);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq33);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 324 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,rinv13);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq13);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,rinv23);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq23);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,rinv31);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq31);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,rinv32);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq32);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,rinv33);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq33);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 324 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*324);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 4a3ee4d..0000000
+++ /dev/null
@@ -1,545 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 43 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 43 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*43);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 37 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 37 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*37);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index ca33661..0000000
+++ /dev/null
@@ -1,855 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 108 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 108 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*108);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 100 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 100 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*100);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 7d6c1c5..0000000
+++ /dev/null
@@ -1,1535 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,rinv01);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq01);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,rinv02);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq02);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 291 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,rinv01);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq01);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,rinv02);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq02);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 291 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*291);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,rinv01);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq01);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,rinv02);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq02);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 277 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,rinv01);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq01);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,rinv02);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq02);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 277 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*277);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index a722d13..0000000
+++ /dev/null
@@ -1,963 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,rinv30);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq30);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 131 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,rinv30);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq30);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 131 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*131);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,rinv30);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq30);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 123 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,rinv30);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq30);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 123 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*123);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index a4c5775..0000000
+++ /dev/null
@@ -1,1655 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,rinv13);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq13);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,rinv23);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq23);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,rinv31);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq31);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,rinv32);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq32);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,rinv33);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq33);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 317 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,rinv13);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq13);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,rinv23);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq23);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,rinv31);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq31);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,rinv32);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq32);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,rinv33);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq33);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 317 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*317);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,rinv13);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq13);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,rinv23);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq23);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,rinv31);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq31);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,rinv32);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq32);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,rinv33);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq33);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 303 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,rinv13);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq13);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,rinv23);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq23);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,rinv31);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq31);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,rinv32);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq32);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,rinv33);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq33);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 303 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*303);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 343b627..0000000
+++ /dev/null
@@ -1,480 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 31 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 31 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 8 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*31);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 30 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 30 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*30);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 8d0439f..0000000
+++ /dev/null
@@ -1,790 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            None
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 96 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 96 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*96);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            None
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 93 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 93 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*18 + inneriter*93);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 8d18a40..0000000
+++ /dev/null
@@ -1,1478 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            None
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,rinv01);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq01);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,rinv02);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq02);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 279 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,rinv01);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq01);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,rinv02);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq02);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 279 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*279);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            None
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,rinv01);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq01);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,rinv02);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq02);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 270 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,rinv00);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq00);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,rinv01);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq01);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,rinv02);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq02);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 270 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*270);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index d27f0b9..0000000
+++ /dev/null
@@ -1,790 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            None
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,rinv30);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq30);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 96 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,rinv30);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq30);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 96 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*96);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            None
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,rinv30);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq30);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 93 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,rinv10);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq10);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,rinv20);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq20);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,rinv30);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq30);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 93 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*18 + inneriter*93);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 43c0384..0000000
+++ /dev/null
@@ -1,1478 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            None
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,rinv13);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq13);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,rinv23);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq23);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,rinv31);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq31);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,rinv32);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq32);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,rinv33);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq33);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 279 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,rinv13);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq13);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,rinv23);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq23);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,rinv31);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq31);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,rinv32);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq32);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,rinv33);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq33);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 279 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*279);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction:            None
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,rinv13);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq13);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,rinv23);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq23);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,rinv31);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq31);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,rinv32);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq32);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,rinv33);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq33);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 270 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,rinv11);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq11);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,rinv12);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq12);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,rinv13);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq13);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,rinv21);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq21);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,rinv22);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq22);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,rinv23);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq23);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,rinv31);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq31);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,rinv32);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq32);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,rinv33);
-            felec            = _fjsp_mul_v2r8(velec,rinvsq33);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 270 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*270);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 8bcfe4f..0000000
+++ /dev/null
@@ -1,732 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                               _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 79 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                               _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 79 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*79);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 64 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 64 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*64);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 4638b19..0000000
+++ /dev/null
@@ -1,1232 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    _fjsp_v2r8           c6grid_10;
-    _fjsp_v2r8           c6grid_20;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                               _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 180 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                               _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 180 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*180);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    _fjsp_v2r8           c6grid_10;
-    _fjsp_v2r8           c6grid_20;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 151 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 151 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*151);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index fce8997..0000000
+++ /dev/null
@@ -1,2472 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    _fjsp_v2r8           c6grid_01;
-    _fjsp_v2r8           c6grid_02;
-    _fjsp_v2r8           c6grid_10;
-    _fjsp_v2r8           c6grid_11;
-    _fjsp_v2r8           c6grid_12;
-    _fjsp_v2r8           c6grid_20;
-    _fjsp_v2r8           c6grid_21;
-    _fjsp_v2r8           c6grid_22;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    c6grid_00        = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                               _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 471 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                               _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 471 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*471);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    _fjsp_v2r8           c6grid_01;
-    _fjsp_v2r8           c6grid_02;
-    _fjsp_v2r8           c6grid_10;
-    _fjsp_v2r8           c6grid_11;
-    _fjsp_v2r8           c6grid_12;
-    _fjsp_v2r8           c6grid_20;
-    _fjsp_v2r8           c6grid_21;
-    _fjsp_v2r8           c6grid_22;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    c6grid_00        = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 400 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 400 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*400);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 9065e6a..0000000
+++ /dev/null
@@ -1,1390 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    _fjsp_v2r8           c6grid_10;
-    _fjsp_v2r8           c6grid_20;
-    _fjsp_v2r8           c6grid_30;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                               _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv30,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 209 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                               _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv30,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 209 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*209);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    _fjsp_v2r8           c6grid_10;
-    _fjsp_v2r8           c6grid_20;
-    _fjsp_v2r8           c6grid_30;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 180 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 180 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*180);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index a208931..0000000
+++ /dev/null
@@ -1,2642 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    _fjsp_v2r8           c6grid_11;
-    _fjsp_v2r8           c6grid_12;
-    _fjsp_v2r8           c6grid_13;
-    _fjsp_v2r8           c6grid_21;
-    _fjsp_v2r8           c6grid_22;
-    _fjsp_v2r8           c6grid_23;
-    _fjsp_v2r8           c6grid_31;
-    _fjsp_v2r8           c6grid_32;
-    _fjsp_v2r8           c6grid_33;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    c6grid_00        = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                               _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv13,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv23,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv31,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv32,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv33,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 503 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                               _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv13,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv23,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv31,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv32,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv33,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 503 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*503);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    _fjsp_v2r8           c6grid_11;
-    _fjsp_v2r8           c6grid_12;
-    _fjsp_v2r8           c6grid_13;
-    _fjsp_v2r8           c6grid_21;
-    _fjsp_v2r8           c6grid_22;
-    _fjsp_v2r8           c6grid_23;
-    _fjsp_v2r8           c6grid_31;
-    _fjsp_v2r8           c6grid_32;
-    _fjsp_v2r8           c6grid_33;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    c6grid_00        = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 432 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 432 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*432);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index dee46fc..0000000
+++ /dev/null
@@ -1,672 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 67 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 67 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*67);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 49 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 49 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*49);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 4441236..0000000
+++ /dev/null
@@ -1,1168 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 168 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 168 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*168);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 136 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 136 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*136);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 0d17373..0000000
+++ /dev/null
@@ -1,2406 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 459 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 459 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*459);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 385 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 385 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*385);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 2fe5af3..0000000
+++ /dev/null
@@ -1,1312 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv30,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 194 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv30,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 194 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*194);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 162 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 162 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*162);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index dfea0d1..0000000
+++ /dev/null
@@ -1,2562 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv13,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv23,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv31,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv32,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv33,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 488 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv13,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv23,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv31,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv32,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv33,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 488 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*488);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 414 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 414 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*414);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index ebead00..0000000
+++ /dev/null
@@ -1,597 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 49 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 49 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 8 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*49);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 42 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 42 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*42);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 8cb6e74..0000000
+++ /dev/null
@@ -1,1093 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 150 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 150 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*150);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 129 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 129 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*18 + inneriter*129);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 032690b..0000000
+++ /dev/null
@@ -1,2339 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 441 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 441 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*441);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 378 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 378 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*378);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index aec2fb6..0000000
+++ /dev/null
@@ -1,1093 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv30,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 150 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv30,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 150 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*150);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 129 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 129 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*18 + inneriter*129);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index b2f8bbd..0000000
+++ /dev/null
@@ -1,2339 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv13,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv23,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv31,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv32,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv33,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 441 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv13,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv23,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv31,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv32,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv33,sh_ewald),velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 441 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*441);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 378 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 378 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*378);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 319defb..0000000
+++ /dev/null
@@ -1,759 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 86 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 86 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*86);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 80 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 80 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*80);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 01c2e66..0000000
+++ /dev/null
@@ -1,1365 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 225 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 225 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*225);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 213 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 213 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*213);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 06cf14b..0000000
+++ /dev/null
@@ -1,2933 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            d                = _fjsp_sub_v2r8(r01,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv01,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            d                = _fjsp_sub_v2r8(r02,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv02,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            d                = _fjsp_sub_v2r8(r11,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            d                = _fjsp_sub_v2r8(r12,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            d                = _fjsp_sub_v2r8(r21,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            d                = _fjsp_sub_v2r8(r22,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 630 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            d                = _fjsp_sub_v2r8(r01,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv01,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            d                = _fjsp_sub_v2r8(r02,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv02,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            d                = _fjsp_sub_v2r8(r11,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            d                = _fjsp_sub_v2r8(r12,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            d                = _fjsp_sub_v2r8(r21,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            d                = _fjsp_sub_v2r8(r22,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 630 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*630);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            d                = _fjsp_sub_v2r8(r01,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv01,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            d                = _fjsp_sub_v2r8(r02,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv02,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            d                = _fjsp_sub_v2r8(r11,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            d                = _fjsp_sub_v2r8(r12,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            d                = _fjsp_sub_v2r8(r21,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            d                = _fjsp_sub_v2r8(r22,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 600 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            d                = _fjsp_sub_v2r8(r01,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv01,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            d                = _fjsp_sub_v2r8(r02,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv02,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            d                = _fjsp_sub_v2r8(r11,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            d                = _fjsp_sub_v2r8(r12,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            d                = _fjsp_sub_v2r8(r21,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            d                = _fjsp_sub_v2r8(r22,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 600 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*600);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 2b6cb9f..0000000
+++ /dev/null
@@ -1,1557 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            d                = _fjsp_sub_v2r8(r30,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv30,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 269 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            d                = _fjsp_sub_v2r8(r30,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv30,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 269 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*269);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            d                = _fjsp_sub_v2r8(r30,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv30,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 257 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            d                = _fjsp_sub_v2r8(r30,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv30,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 257 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*257);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 3eb4c7d..0000000
+++ /dev/null
@@ -1,3137 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            d                = _fjsp_sub_v2r8(r11,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            d                = _fjsp_sub_v2r8(r12,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            d                = _fjsp_sub_v2r8(r13,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv13,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            d                = _fjsp_sub_v2r8(r21,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            d                = _fjsp_sub_v2r8(r22,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            d                = _fjsp_sub_v2r8(r23,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv23,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            d                = _fjsp_sub_v2r8(r31,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv31,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            d                = _fjsp_sub_v2r8(r32,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv32,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            d                = _fjsp_sub_v2r8(r33,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv33,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 677 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            d                = _fjsp_sub_v2r8(r11,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            d                = _fjsp_sub_v2r8(r12,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            d                = _fjsp_sub_v2r8(r13,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv13,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            d                = _fjsp_sub_v2r8(r21,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            d                = _fjsp_sub_v2r8(r22,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            d                = _fjsp_sub_v2r8(r23,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv23,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            d                = _fjsp_sub_v2r8(r31,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv31,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            d                = _fjsp_sub_v2r8(r32,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv32,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            d                = _fjsp_sub_v2r8(r33,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv33,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 677 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*677);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            d                = _fjsp_sub_v2r8(r11,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            d                = _fjsp_sub_v2r8(r12,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            d                = _fjsp_sub_v2r8(r13,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv13,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            d                = _fjsp_sub_v2r8(r21,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            d                = _fjsp_sub_v2r8(r22,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            d                = _fjsp_sub_v2r8(r23,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv23,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            d                = _fjsp_sub_v2r8(r31,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv31,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            d                = _fjsp_sub_v2r8(r32,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv32,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            d                = _fjsp_sub_v2r8(r33,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv33,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 647 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            d                = _fjsp_sub_v2r8(r11,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            d                = _fjsp_sub_v2r8(r12,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            d                = _fjsp_sub_v2r8(r13,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv13,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            d                = _fjsp_sub_v2r8(r21,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            d                = _fjsp_sub_v2r8(r22,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            d                = _fjsp_sub_v2r8(r23,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv23,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            d                = _fjsp_sub_v2r8(r31,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv31,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            d                = _fjsp_sub_v2r8(r32,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv32,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            d                = _fjsp_sub_v2r8(r33,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv33,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 647 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*647);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index f8c11ef..0000000
+++ /dev/null
@@ -1,680 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 68 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 68 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 8 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*68);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 65 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 65 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*65);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 769d682..0000000
+++ /dev/null
@@ -1,1286 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 207 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 207 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*207);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 198 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 198 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*18 + inneriter*198);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 2f85866..0000000
+++ /dev/null
@@ -1,2862 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            d                = _fjsp_sub_v2r8(r01,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv01,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            d                = _fjsp_sub_v2r8(r02,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv02,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            d                = _fjsp_sub_v2r8(r11,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            d                = _fjsp_sub_v2r8(r12,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            d                = _fjsp_sub_v2r8(r21,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            d                = _fjsp_sub_v2r8(r22,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 612 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            d                = _fjsp_sub_v2r8(r01,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv01,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            d                = _fjsp_sub_v2r8(r02,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv02,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            d                = _fjsp_sub_v2r8(r11,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            d                = _fjsp_sub_v2r8(r12,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            d                = _fjsp_sub_v2r8(r21,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            d                = _fjsp_sub_v2r8(r22,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 612 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*612);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            d                = _fjsp_sub_v2r8(r01,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv01,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            d                = _fjsp_sub_v2r8(r02,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv02,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            d                = _fjsp_sub_v2r8(r11,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            d                = _fjsp_sub_v2r8(r12,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            d                = _fjsp_sub_v2r8(r21,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            d                = _fjsp_sub_v2r8(r22,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 585 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            d                = _fjsp_sub_v2r8(r01,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv01,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            d                = _fjsp_sub_v2r8(r02,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv02,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            d                = _fjsp_sub_v2r8(r11,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            d                = _fjsp_sub_v2r8(r12,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            d                = _fjsp_sub_v2r8(r21,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            d                = _fjsp_sub_v2r8(r22,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 585 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*585);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index b7cfc2d..0000000
+++ /dev/null
@@ -1,1286 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            d                = _fjsp_sub_v2r8(r30,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv30,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 207 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            d                = _fjsp_sub_v2r8(r30,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv30,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 207 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*207);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            d                = _fjsp_sub_v2r8(r30,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv30,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 198 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            d                = _fjsp_sub_v2r8(r10,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            d                = _fjsp_sub_v2r8(r20,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            d                = _fjsp_sub_v2r8(r30,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv30,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 198 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*18 + inneriter*198);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 165cd5f..0000000
+++ /dev/null
@@ -1,2862 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            d                = _fjsp_sub_v2r8(r11,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            d                = _fjsp_sub_v2r8(r12,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            d                = _fjsp_sub_v2r8(r13,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv13,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            d                = _fjsp_sub_v2r8(r21,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            d                = _fjsp_sub_v2r8(r22,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            d                = _fjsp_sub_v2r8(r23,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv23,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            d                = _fjsp_sub_v2r8(r31,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv31,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            d                = _fjsp_sub_v2r8(r32,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv32,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            d                = _fjsp_sub_v2r8(r33,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv33,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 612 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            d                = _fjsp_sub_v2r8(r11,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            d                = _fjsp_sub_v2r8(r12,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            d                = _fjsp_sub_v2r8(r13,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv13,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            d                = _fjsp_sub_v2r8(r21,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            d                = _fjsp_sub_v2r8(r22,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            d                = _fjsp_sub_v2r8(r23,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv23,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            d                = _fjsp_sub_v2r8(r31,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv31,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            d                = _fjsp_sub_v2r8(r32,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv32,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            d                = _fjsp_sub_v2r8(r33,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv33,_fjsp_mul_v2r8(velec,dsw)) );
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 612 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*612);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            d                = _fjsp_sub_v2r8(r11,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            d                = _fjsp_sub_v2r8(r12,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            d                = _fjsp_sub_v2r8(r13,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv13,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            d                = _fjsp_sub_v2r8(r21,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            d                = _fjsp_sub_v2r8(r22,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            d                = _fjsp_sub_v2r8(r23,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv23,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            d                = _fjsp_sub_v2r8(r31,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv31,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            d                = _fjsp_sub_v2r8(r32,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv32,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            d                = _fjsp_sub_v2r8(r33,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv33,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 585 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            d                = _fjsp_sub_v2r8(r11,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            d                = _fjsp_sub_v2r8(r12,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            d                = _fjsp_sub_v2r8(r13,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv13,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            d                = _fjsp_sub_v2r8(r21,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            d                = _fjsp_sub_v2r8(r22,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            d                = _fjsp_sub_v2r8(r23,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv23,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            d                = _fjsp_sub_v2r8(r31,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv31,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            d                = _fjsp_sub_v2r8(r32,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv32,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            d                = _fjsp_sub_v2r8(r33,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv33,_fjsp_mul_v2r8(velec,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 585 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*585);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index eaf4a2d..0000000
+++ /dev/null
@@ -1,740 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 78 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 78 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*78);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 65 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 65 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*65);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 14400d6..0000000
+++ /dev/null
@@ -1,1160 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 169 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 169 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*169);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 146 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 146 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*146);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index d222056..0000000
+++ /dev/null
@@ -1,2170 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 430 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 430 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*430);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 377 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 377 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*377);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 34c998a..0000000
+++ /dev/null
@@ -1,1276 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 194 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 194 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*194);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 171 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 171 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*171);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 733cc67..0000000
+++ /dev/null
@@ -1,2298 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 458 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 458 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*458);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 405 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 405 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*405);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 107d735..0000000
+++ /dev/null
@@ -1,674 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-           vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 68 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-           vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 68 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*68);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 61 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 61 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*61);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index eb7b992..0000000
+++ /dev/null
@@ -1,1098 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    _fjsp_v2r8           c6grid_10;
-    _fjsp_v2r8           c6grid_20;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-           vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 159 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-           vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 159 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*159);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJEw_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    _fjsp_v2r8           c6grid_10;
-    _fjsp_v2r8           c6grid_20;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 142 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 142 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*142);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 30f4d0e..0000000
+++ /dev/null
@@ -1,2110 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJEw_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    _fjsp_v2r8           c6grid_01;
-    _fjsp_v2r8           c6grid_02;
-    _fjsp_v2r8           c6grid_10;
-    _fjsp_v2r8           c6grid_11;
-    _fjsp_v2r8           c6grid_12;
-    _fjsp_v2r8           c6grid_20;
-    _fjsp_v2r8           c6grid_21;
-    _fjsp_v2r8           c6grid_22;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    c6grid_00        = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-           vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 420 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-           vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 420 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*420);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJEw_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    _fjsp_v2r8           c6grid_01;
-    _fjsp_v2r8           c6grid_02;
-    _fjsp_v2r8           c6grid_10;
-    _fjsp_v2r8           c6grid_11;
-    _fjsp_v2r8           c6grid_12;
-    _fjsp_v2r8           c6grid_20;
-    _fjsp_v2r8           c6grid_21;
-    _fjsp_v2r8           c6grid_22;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    c6grid_00        = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 373 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 373 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*373);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index c258926..0000000
+++ /dev/null
@@ -1,1220 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    _fjsp_v2r8           c6grid_10;
-    _fjsp_v2r8           c6grid_20;
-    _fjsp_v2r8           c6grid_30;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-           vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 185 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-           vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 185 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*185);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    _fjsp_v2r8           c6grid_10;
-    _fjsp_v2r8           c6grid_20;
-    _fjsp_v2r8           c6grid_30;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 168 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 168 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*168);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index e716342..0000000
+++ /dev/null
@@ -1,2244 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJEw_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    _fjsp_v2r8           c6grid_11;
-    _fjsp_v2r8           c6grid_12;
-    _fjsp_v2r8           c6grid_13;
-    _fjsp_v2r8           c6grid_21;
-    _fjsp_v2r8           c6grid_22;
-    _fjsp_v2r8           c6grid_23;
-    _fjsp_v2r8           c6grid_31;
-    _fjsp_v2r8           c6grid_32;
-    _fjsp_v2r8           c6grid_33;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    c6grid_00        = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-           vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 449 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-           vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 449 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*449);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJEw_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LJEwald
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    _fjsp_v2r8           c6grid_11;
-    _fjsp_v2r8           c6grid_12;
-    _fjsp_v2r8           c6grid_13;
-    _fjsp_v2r8           c6grid_21;
-    _fjsp_v2r8           c6grid_22;
-    _fjsp_v2r8           c6grid_23;
-    _fjsp_v2r8           c6grid_31;
-    _fjsp_v2r8           c6grid_32;
-    _fjsp_v2r8           c6grid_33;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    c6grid_00        = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 402 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 402 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*402);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index fbb693c..0000000
+++ /dev/null
@@ -1,614 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 56 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 56 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*56);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 46 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 46 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*46);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index df8f97d..0000000
+++ /dev/null
@@ -1,1034 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 147 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 147 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*147);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 127 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 127 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*127);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 2821f97..0000000
+++ /dev/null
@@ -1,2044 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 408 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 408 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*408);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 358 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 358 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*358);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 4f4d347..0000000
+++ /dev/null
@@ -1,1142 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 170 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 170 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*170);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 150 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 150 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*150);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index e205570..0000000
+++ /dev/null
@@ -1,2164 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 434 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 434 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*434);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 384 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 384 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*384);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index e952103..0000000
+++ /dev/null
@@ -1,549 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 44 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 44 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 8 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*44);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 39 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 39 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*39);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 8e52d58..0000000
+++ /dev/null
@@ -1,969 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 135 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 135 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*135);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 120 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 120 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*18 + inneriter*120);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 9e7b9d8..0000000
+++ /dev/null
@@ -1,1987 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 396 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 396 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*396);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 351 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r01              = _fjsp_mul_v2r8(rsq01,rinv01);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r02              = _fjsp_mul_v2r8(rsq02,rinv02);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 351 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*351);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index fe6d4f1..0000000
+++ /dev/null
@@ -1,969 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 135 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 135 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*135);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 120 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r10              = _fjsp_mul_v2r8(rsq10,rinv10);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r20              = _fjsp_mul_v2r8(rsq20,rinv20);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r30              = _fjsp_mul_v2r8(rsq30,rinv30);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r30,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 120 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*18 + inneriter*120);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 0ab3b8e..0000000
+++ /dev/null
@@ -1,1987 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 396 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            ewtabD           = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            ewtabFn          = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 396 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*396);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction:            None
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 351 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r11              = _fjsp_mul_v2r8(rsq11,rinv11);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r12              = _fjsp_mul_v2r8(rsq12,rinv12);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r13              = _fjsp_mul_v2r8(rsq13,rinv13);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r21              = _fjsp_mul_v2r8(rsq21,rinv21);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r22              = _fjsp_mul_v2r8(rsq22,rinv22);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r23              = _fjsp_mul_v2r8(rsq23,rinv23);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r31              = _fjsp_mul_v2r8(rsq31,rinv31);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r32              = _fjsp_mul_v2r8(rsq32,rinv32);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r33              = _fjsp_mul_v2r8(rsq33,rinv33);
-
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 351 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*351);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index f5e35ed..0000000
+++ /dev/null
@@ -1,632 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 59 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 59 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*59);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 51 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 51 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 6 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*6 + inneriter*51);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 9f6d78b..0000000
+++ /dev/null
@@ -1,628 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction:            LJEwald
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    rcutoff_scalar   = fr->ic->rvdw;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                               _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 59 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                               _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 59 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*59);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction:            LJEwald
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    rcutoff_scalar   = fr->ic->rvdw;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 51 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 51 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 6 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*6 + inneriter*51);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJEw_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJEw_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index fe95fde..0000000
+++ /dev/null
@@ -1,574 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction:            LJEwald
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecNone_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-           vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 50 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-           vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 50 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*50);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction:            LJEwald
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecNone_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8           c6grid_00;
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 48 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
-                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 48 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 6 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*6 + inneriter*48);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index c084dc2..0000000
+++ /dev/null
@@ -1,552 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    rcutoff_scalar   = fr->ic->rvdw;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 44 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 44 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*44);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    rcutoff_scalar   = fr->ic->rvdw;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 33 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 33 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 6 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*6 + inneriter*33);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 69ea5a8..0000000
+++ /dev/null
@@ -1,636 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    rcutoff_scalar   = fr->ic->rvdw;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rvdw_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 62 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 62 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*62);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    rcutoff_scalar   = fr->ic->rvdw;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rvdw_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 59 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 59 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 6 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*6 + inneriter*59);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index da647a0..0000000
+++ /dev/null
@@ -1,498 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 35 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 35 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*35);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 30 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-
-            /* Load parameters for j particles */
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 30 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 6 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*6 + inneriter*30);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index a0de021..0000000
+++ /dev/null
@@ -1,733 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 75 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 75 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*75);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 60 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 60 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*60);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 478d593..0000000
+++ /dev/null
@@ -1,1115 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 156 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 156 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*156);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 129 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 129 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*129);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index cd9a3ec..0000000
+++ /dev/null
@@ -1,2011 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 387 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 387 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*387);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 324 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 324 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*324);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 0ad4886..0000000
+++ /dev/null
@@ -1,1221 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 179 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 179 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*179);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 153 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 153 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*153);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 799f752..0000000
+++ /dev/null
@@ -1,2129 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 413 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 413 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*413);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 351 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 351 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*351);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 7e302d8..0000000
+++ /dev/null
@@ -1,607 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 57 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 57 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*57);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 40 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 40 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*40);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 1fae9e0..0000000
+++ /dev/null
@@ -1,989 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 138 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 138 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*138);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 109 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 109 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*109);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 73e4c28..0000000
+++ /dev/null
@@ -1,1885 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 369 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 369 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*369);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 304 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 304 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*304);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index c57c50d..0000000
+++ /dev/null
@@ -1,1133 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 164 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 164 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*164);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 135 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 135 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*135);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index c33e0a6..0000000
+++ /dev/null
@@ -1,2041 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 398 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 398 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*398);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 333 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 333 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*333);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 0bc1aa7..0000000
+++ /dev/null
@@ -1,683 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rvdw_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 73 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 73 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*73);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rvdw_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 64 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 64 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*64);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 3198b6d..0000000
+++ /dev/null
@@ -1,1065 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rvdw_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 154 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 154 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*154);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rvdw_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 133 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 133 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*133);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 8a9e47c..0000000
+++ /dev/null
@@ -1,1961 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rvdw_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 385 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 385 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*385);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rvdw_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 328 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 328 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*328);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index c411871..0000000
+++ /dev/null
@@ -1,1213 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rvdw_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 182 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 182 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*182);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rvdw_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 161 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 161 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*161);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 5428c35..0000000
+++ /dev/null
@@ -1,2121 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rvdw_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 416 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 416 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*416);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    rswitch_scalar   = fr->ic->rvdw_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 359 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            d                = _fjsp_sub_v2r8(r00,rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
-            /* Evaluate switch function */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 359 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*359);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index f59b470..0000000
+++ /dev/null
@@ -1,532 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 39 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 39 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 8 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*39);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 33 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            }
-
-            /* Inner loop uses 33 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*33);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 3da8a77..0000000
+++ /dev/null
@@ -1,914 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 120 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 120 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*120);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 102 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 102 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*18 + inneriter*102);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 8e1022f..0000000
+++ /dev/null
@@ -1,1818 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 351 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 351 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*351);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 297 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 297 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*297);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 73f6d16..0000000
+++ /dev/null
@@ -1,914 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 120 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 120 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*120);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 102 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
-            {
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            }
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 102 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*18 + inneriter*102);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 696fc38..0000000
+++ /dev/null
@@ -1,1818 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 351 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 351 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*351);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 297 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            }
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
-            {
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            }
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 297 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*297);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 7be622b..0000000
+++ /dev/null
@@ -1,683 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 70 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 70 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*70);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 57 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 57 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*57);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 6528299..0000000
+++ /dev/null
@@ -1,989 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 143 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 143 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*143);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 120 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 120 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*120);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index e085290..0000000
+++ /dev/null
@@ -1,1657 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 350 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 350 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*350);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 297 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 297 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*297);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index db417f2..0000000
+++ /dev/null
@@ -1,1097 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 167 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 167 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*167);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 144 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 144 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*144);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 45ab81a..0000000
+++ /dev/null
@@ -1,1777 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 377 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 377 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*377);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            CubicSplineTable
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 324 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            r00              = _fjsp_mul_v2r8(rsq00,rinv00);
-
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r00,vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            F                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            H                = _fjsp_setzero_v2r8();
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 324 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*324);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 1bb8b90..0000000
+++ /dev/null
@@ -1,549 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 47 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 47 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 9 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*47);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 37 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 37 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*37);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index ec86ec3..0000000
+++ /dev/null
@@ -1,855 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 120 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 120 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*120);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 100 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 100 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*100);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index b3087f6..0000000
+++ /dev/null
@@ -1,1523 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 327 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 327 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 20 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*327);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 277 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 277 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*277);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 81f75c1..0000000
+++ /dev/null
@@ -1,963 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 143 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 143 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*143);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-            vdwjidx0B        = 2*vdwtype[jnrB+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 123 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-            vdwjidx0A        = 2*vdwtype[jnrA+0];
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
-                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 123 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*123);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 5259348..0000000
+++ /dev/null
@@ -1,1643 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-        vvdwsum          = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 353 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 353 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 26 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*353);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            LennardJones
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    vdwjidx0A        = 2*vdwtype[inr+0];
-    c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
-    c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 303 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
-            fscal            = fvdw;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 303 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 24 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*303);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 74f8e5d..0000000
+++ /dev/null
@@ -1,484 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 35 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 35 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 8 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*35);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Particle-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-
-        /* Load parameters for i particles */
-        iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 30 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
-            /* Inner loop uses 30 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 7 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*30);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index b293a71..0000000
+++ /dev/null
@@ -1,790 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 108 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 108 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*108);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Water3-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 93 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq00             = _fjsp_mul_v2r8(iq0,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 93 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*18 + inneriter*93);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 06b8779..0000000
+++ /dev/null
@@ -1,1466 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 315 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 315 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*315);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Water3-Water3
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset0;
-    _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
-    _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
-    _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
-    jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    qq00             = _fjsp_mul_v2r8(iq0,jq0);
-    qq01             = _fjsp_mul_v2r8(iq0,jq1);
-    qq02             = _fjsp_mul_v2r8(iq0,jq2);
-    qq10             = _fjsp_mul_v2r8(iq1,jq0);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq20             = _fjsp_mul_v2r8(iq2,jq0);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
-        fix0             = _fjsp_setzero_v2r8();
-        fiy0             = _fjsp_setzero_v2r8();
-        fiz0             = _fjsp_setzero_v2r8();
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 270 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
-            /* Calculate displacement vector */
-            dx00             = _fjsp_sub_v2r8(ix0,jx0);
-            dy00             = _fjsp_sub_v2r8(iy0,jy0);
-            dz00             = _fjsp_sub_v2r8(iz0,jz0);
-            dx01             = _fjsp_sub_v2r8(ix0,jx1);
-            dy01             = _fjsp_sub_v2r8(iy0,jy1);
-            dz01             = _fjsp_sub_v2r8(iz0,jz1);
-            dx02             = _fjsp_sub_v2r8(ix0,jx2);
-            dy02             = _fjsp_sub_v2r8(iy0,jy2);
-            dz02             = _fjsp_sub_v2r8(iz0,jz2);
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-
-            /* Calculate squared distance and things based on it */
-            rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-            rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
-            rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
-            rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
-            rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
-            rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-
-            rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
-            rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
-            rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-            
-            fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-            
-            fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
-            fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
-            fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-            
-            fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
-            /* Inner loop uses 270 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*270);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index fabd5c9..0000000
+++ /dev/null
@@ -1,790 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 108 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 108 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*108);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Water4-Particle
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx0A,vdwjidx0B;
-    _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
-    _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
-    _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
-    _fjsp_v2r8       dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 93 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-
-            /* Calculate displacement vector */
-            dx10             = _fjsp_sub_v2r8(ix1,jx0);
-            dy10             = _fjsp_sub_v2r8(iy1,jy0);
-            dz10             = _fjsp_sub_v2r8(iz1,jz0);
-            dx20             = _fjsp_sub_v2r8(ix2,jx0);
-            dy20             = _fjsp_sub_v2r8(iy2,jy0);
-            dz20             = _fjsp_sub_v2r8(iz2,jz0);
-            dx30             = _fjsp_sub_v2r8(ix3,jx0);
-            dy30             = _fjsp_sub_v2r8(iy3,jy0);
-            dz30             = _fjsp_sub_v2r8(iz3,jz0);
-
-            /* Calculate squared distance and things based on it */
-            rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
-            rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-            rsq30            = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
-            rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
-            rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
-            rinv30           = gmx_fjsp_invsqrt_v2r8(rsq30);
-
-            rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
-            rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
-            rinvsq30         = _fjsp_mul_v2r8(rinv30,rinv30);
-
-            /* Load parameters for j particles */
-            jq0              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
-            fjx0             = _fjsp_setzero_v2r8();
-            fjy0             = _fjsp_setzero_v2r8();
-            fjz0             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq10             = _fjsp_mul_v2r8(iq1,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-            
-            fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq20             = _fjsp_mul_v2r8(iq2,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-            
-            fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* Compute parameters for interactions between i and j atoms */
-            qq30             = _fjsp_mul_v2r8(iq3,jq0);
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx30,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy30,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-            
-            fjx0             = _fjsp_madd_v2r8(dx30,fscal,fjx0);
-            fjy0             = _fjsp_madd_v2r8(dy30,fscal,fjy0);
-            fjz0             = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
-            /* Inner loop uses 93 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*18 + inneriter*93);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index d931e43..0000000
+++ /dev/null
@@ -1,1466 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Reset potential sums */
-        velecsum         = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 315 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 315 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 19 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*315);
-}
-/*
- * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction:            None
- * Geometry:                   Water4-Water4
- * Calculate force/pot:        Force
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    int              vdwioffset1;
-    _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
-    int              vdwioffset2;
-    _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
-    int              vdwioffset3;
-    _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
-    int              vdwjidx1A,vdwjidx1B;
-    _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
-    int              vdwjidx2A,vdwjidx2B;
-    _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
-    int              vdwjidx3A,vdwjidx3B;
-    _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
-    _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
-    _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
-    _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
-    _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
-    _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
-    _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
-    _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
-    _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
-    _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
-    iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-    iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
-    jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
-    jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
-    jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
-    qq11             = _fjsp_mul_v2r8(iq1,jq1);
-    qq12             = _fjsp_mul_v2r8(iq1,jq2);
-    qq13             = _fjsp_mul_v2r8(iq1,jq3);
-    qq21             = _fjsp_mul_v2r8(iq2,jq1);
-    qq22             = _fjsp_mul_v2r8(iq2,jq2);
-    qq23             = _fjsp_mul_v2r8(iq2,jq3);
-    qq31             = _fjsp_mul_v2r8(iq3,jq1);
-    qq32             = _fjsp_mul_v2r8(iq3,jq2);
-    qq33             = _fjsp_mul_v2r8(iq3,jq3);
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
-        fix1             = _fjsp_setzero_v2r8();
-        fiy1             = _fjsp_setzero_v2r8();
-        fiz1             = _fjsp_setzero_v2r8();
-        fix2             = _fjsp_setzero_v2r8();
-        fiy2             = _fjsp_setzero_v2r8();
-        fiz2             = _fjsp_setzero_v2r8();
-        fix3             = _fjsp_setzero_v2r8();
-        fiy3             = _fjsp_setzero_v2r8();
-        fiz3             = _fjsp_setzero_v2r8();
-
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            fscal            = felec;
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 270 flops */
-        }
-
-        if(jidx<j_index_end)
-        {
-
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
-            /* Calculate displacement vector */
-            dx11             = _fjsp_sub_v2r8(ix1,jx1);
-            dy11             = _fjsp_sub_v2r8(iy1,jy1);
-            dz11             = _fjsp_sub_v2r8(iz1,jz1);
-            dx12             = _fjsp_sub_v2r8(ix1,jx2);
-            dy12             = _fjsp_sub_v2r8(iy1,jy2);
-            dz12             = _fjsp_sub_v2r8(iz1,jz2);
-            dx13             = _fjsp_sub_v2r8(ix1,jx3);
-            dy13             = _fjsp_sub_v2r8(iy1,jy3);
-            dz13             = _fjsp_sub_v2r8(iz1,jz3);
-            dx21             = _fjsp_sub_v2r8(ix2,jx1);
-            dy21             = _fjsp_sub_v2r8(iy2,jy1);
-            dz21             = _fjsp_sub_v2r8(iz2,jz1);
-            dx22             = _fjsp_sub_v2r8(ix2,jx2);
-            dy22             = _fjsp_sub_v2r8(iy2,jy2);
-            dz22             = _fjsp_sub_v2r8(iz2,jz2);
-            dx23             = _fjsp_sub_v2r8(ix2,jx3);
-            dy23             = _fjsp_sub_v2r8(iy2,jy3);
-            dz23             = _fjsp_sub_v2r8(iz2,jz3);
-            dx31             = _fjsp_sub_v2r8(ix3,jx1);
-            dy31             = _fjsp_sub_v2r8(iy3,jy1);
-            dz31             = _fjsp_sub_v2r8(iz3,jz1);
-            dx32             = _fjsp_sub_v2r8(ix3,jx2);
-            dy32             = _fjsp_sub_v2r8(iy3,jy2);
-            dz32             = _fjsp_sub_v2r8(iz3,jz2);
-            dx33             = _fjsp_sub_v2r8(ix3,jx3);
-            dy33             = _fjsp_sub_v2r8(iy3,jy3);
-            dz33             = _fjsp_sub_v2r8(iz3,jz3);
-
-            /* Calculate squared distance and things based on it */
-            rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
-            rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
-            rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
-            rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
-            rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-            rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
-            rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
-            rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
-            rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
-            rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
-            rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
-            rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
-            rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
-            rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
-            rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
-            rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
-            rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
-            rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
-
-            rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
-            rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
-            rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
-            rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
-            rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
-            rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
-            rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
-            rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
-            rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
-
-            fjx1             = _fjsp_setzero_v2r8();
-            fjy1             = _fjsp_setzero_v2r8();
-            fjz1             = _fjsp_setzero_v2r8();
-            fjx2             = _fjsp_setzero_v2r8();
-            fjy2             = _fjsp_setzero_v2r8();
-            fjz2             = _fjsp_setzero_v2r8();
-            fjx3             = _fjsp_setzero_v2r8();
-            fjy3             = _fjsp_setzero_v2r8();
-            fjz3             = _fjsp_setzero_v2r8();
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-            
-            fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-            
-            fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
-            fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
-            fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-            
-            fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-            
-            fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-            
-            fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
-            fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
-            fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-            
-            fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-            
-            fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
-            fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
-            fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-            
-            fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
-            fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
-            fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
-            fscal            = felec;
-
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
-            /* Update vectorial force */
-            fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
-            fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
-            fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-            
-            fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
-            fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
-            fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
-            /* Inner loop uses 270 flops */
-        }
-
-        /* End of innermost loop */
-
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses 18 flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*270);
-}
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.cpp
deleted file mode 100644 (file)
index 75a64e8..0000000
+++ /dev/null
@@ -1,514 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "gromacs/gmxlib/nonbonded/nb_kernel.h"
-
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double;
-
-
-nb_kernel_info_t
-    kernellist_sparc64_hpc_ace_double[] =
-{
-    { nb_kernel_ElecNone_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecNone_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LJEwald", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LJEwald", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LJEwald", "PotentialShift", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecEw_VdwLJEw_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwLJEw_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecEw_VdwLJEw_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwLJEw_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
-    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
-};
-
-int
-    kernellist_sparc64_hpc_ace_double_size = sizeof(kernellist_sparc64_hpc_ace_double)/sizeof(kernellist_sparc64_hpc_ace_double[0]);
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h
deleted file mode 100644 (file)
index 28c6462..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#ifndef nb_kernel_sparc64_hpc_ace_double_h
-#define nb_kernel_sparc64_hpc_ace_double_h
-
-#include "gromacs/gmxlib/nonbonded/nb_kernel.h"
-
-/* List of kernels for this architecture with metadata about them */
-extern nb_kernel_info_t
-    kernellist_sparc64_hpc_ace_double[];
-
-/* Length of kernellist_c */
-extern int
-    kernellist_sparc64_hpc_ace_double_size;
-
-#endif
diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_template_sparc64_hpc_ace_double.pre b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_template_sparc64_hpc_ace_double.pre
deleted file mode 100644 (file)
index d47dbd3..0000000
+++ /dev/null
@@ -1,1042 +0,0 @@
-/* #if 0 */
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#error This file must be processed with the Gromacs pre-preprocessor
-/* #endif */
-/* #if INCLUDE_HEADER */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-/* #endif */
-
-/* ## List of variables set by the generating script:                                    */
-/* ##                                                                                    */
-/* ## Setttings that apply to the entire kernel:                                         */
-/* ## KERNEL_ELEC:           String, choice for electrostatic interactions               */
-/* ## KERNEL_VDW:            String, choice for van der Waals interactions               */
-/* ## KERNEL_NAME:           String, name of this kernel                                 */
-/* ## KERNEL_VF:             String telling if we calculate potential, force, or both    */
-/* ## GEOMETRY_I/GEOMETRY_J: String, name of each geometry, e.g. 'Water3' or '1Particle' */
-/* ##                                                                                    */
-/* ## Setttings that apply to particles in the outer (I) or inner (J) loops:             */
-/* ## PARTICLES_I[]/         Arrays with lists of i/j particles to use in kernel. It is  */
-/* ## PARTICLES_J[]:         just [0] for particle geometry, but can be longer for water */
-/* ## PARTICLES_ELEC_I[]/    Arrays with lists of i/j particle that have electrostatics  */
-/* ## PARTICLES_ELEC_J[]:    interactions that should be calculated in this kernel.      */
-/* ## PARTICLES_VDW_I[]/     Arrays with the list of i/j particle that have VdW          */
-/* ## PARTICLES_VDW_J[]:     interactions that should be calculated in this kernel.      */
-/* ##                                                                                    */
-/* ## Setttings for pairs of interactions (e.g. 2nd i particle against 1st j particle)   */
-/* ## PAIRS_IJ[]:            Array with (i,j) tuples of pairs for which interactions     */
-/* ##                        should be calculated in this kernel. Zero-charge particles  */
-/* ##                        do not have interactions with particles without vdw, and    */
-/* ##                        Vdw-only interactions are not evaluated in a no-vdw-kernel. */
-/* ## INTERACTION_FLAGS[][]: 2D matrix, dimension e.g. 3*3 for water-water interactions. */
-/* ##                        For each i-j pair, the element [I][J] is a list of strings  */
-/* ##                        defining properties/flags of this interaction. Examples     */
-/* ##                        include 'electrostatics'/'vdw' if that type of interaction  */
-/* ##                        should be evaluated, 'rsq'/'rinv'/'rinvsq' if those values  */
-/* ##                        are needed, and 'exactcutoff' or 'shift','switch' to        */
-/* ##                        decide if the force/potential should be modified. This way  */
-/* ##                        we only calculate values absolutely needed for each case.   */
-
-/* ## Calculate the size and offset for (merged/interleaved) table data */
-
-/*
- * Gromacs nonbonded kernel:   {KERNEL_NAME}
- * Electrostatics interaction: {KERNEL_ELEC}
- * VdW interaction:            {KERNEL_VDW}
- * Geometry:                   {GEOMETRY_I}-{GEOMETRY_J}
- * Calculate force/pot:        {KERNEL_VF}
- */
-void
-{KERNEL_NAME}
-                    (t_nblist                    * gmx_restrict       nlist,
-                     rvec                        * gmx_restrict          xx,
-                     rvec                        * gmx_restrict          ff,
-                     struct t_forcerec           * gmx_restrict          fr,
-                     t_mdatoms                   * gmx_restrict     mdatoms,
-                     nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
-                     t_nrnb                      * gmx_restrict        nrnb)
-{
-    /* ## Not all variables are used for all kernels, but any optimizing compiler fixes that, */
-    /* ## so there is no point in going to extremes to exclude variables that are not needed. */
-    /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
-     * just 0 for non-waters.
-     * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
-     * jnr indices corresponding to data put in the four positions in the SIMD register.
-     */
-    int              i_shift_offset,i_coord_offset,outeriter,inneriter;
-    int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
-    int              jnrA,jnrB;
-    int              j_coord_offsetA,j_coord_offsetB;
-    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
-    real             rcutoff_scalar;
-    real             *shiftvec,*fshift,*x,*f;
-    _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
-    /* #for I in PARTICLES_I */
-    int              vdwioffset{I};
-    _fjsp_v2r8       ix{I},iy{I},iz{I},fix{I},fiy{I},fiz{I},iq{I},isai{I};
-    /* #endfor */
-    /* #for J in PARTICLES_J */
-    int              vdwjidx{J}A,vdwjidx{J}B;
-    _fjsp_v2r8       jx{J},jy{J},jz{J},fjx{J},fjy{J},fjz{J},jq{J},isaj{J};
-    /* #endfor */
-    /* #for I,J in PAIRS_IJ */
-    _fjsp_v2r8       dx{I}{J},dy{I}{J},dz{I}{J},rsq{I}{J},rinv{I}{J},rinvsq{I}{J},r{I}{J},qq{I}{J},c6_{I}{J},c12_{I}{J};
-    /* #endfor */
-    /* #if KERNEL_ELEC != 'None' */
-    _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
-    real             *charge;
-    /* #endif */
-    /* #if KERNEL_VDW != 'None' */
-    int              nvdwtype;
-    _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
-    int              *vdwtype;
-    real             *vdwparam;
-    _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
-    _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
-    /* #endif */
-    /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
-    _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
-    real             *vftab;
-    /* #endif */
-    /* #if 'LJEwald' in KERNEL_VDW */
-    /* #for I,J in PAIRS_IJ */
-    _fjsp_v2r8           c6grid_{I}{J};
-    /* #endfor */
-    real                 *vdwgridparam;
-    _fjsp_v2r8           ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
-    _fjsp_v2r8           one_half = gmx_fjsp_set1_v2r8(0.5);
-    _fjsp_v2r8           minus_one = gmx_fjsp_set1_v2r8(-1.0);
-    /* #endif */
-    /* #if 'Ewald' in KERNEL_ELEC */
-    _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
-    real             *ewtab;
-    /* #endif */
-    /* #if 'PotentialSwitch' in [KERNEL_MOD_ELEC,KERNEL_MOD_VDW] */
-    _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
-    real             rswitch_scalar,d_scalar;
-    /* #endif */
-    _fjsp_v2r8       itab_tmp;
-    _fjsp_v2r8       dummy_mask,cutoff_mask;
-    _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
-    _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
-    union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
-    x                = xx[0];
-    f                = ff[0];
-
-    nri              = nlist->nri;
-    iinr             = nlist->iinr;
-    jindex           = nlist->jindex;
-    jjnr             = nlist->jjnr;
-    shiftidx         = nlist->shift;
-    gid              = nlist->gid;
-    shiftvec         = fr->shift_vec[0];
-    fshift           = fr->fshift[0];
-    /* #if KERNEL_ELEC != 'None' */
-    facel            = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
-    charge           = mdatoms->chargeA;
-    /*     #if 'ReactionField' in KERNEL_ELEC */
-    krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
-    krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
-    crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-    /*     #endif */
-    /* #endif */
-    /* #if KERNEL_VDW != 'None' */
-    nvdwtype         = fr->ntype;
-    vdwparam         = fr->nbfp;
-    vdwtype          = mdatoms->typeA;
-    /* #endif */
-    /* #if 'LJEwald' in KERNEL_VDW */
-    vdwgridparam     = fr->ljpme_c6grid;
-    sh_lj_ewald             = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
-    ewclj            = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
-    ewclj2           = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-    /* #endif */
-
-    /* #if 'Table' in KERNEL_ELEC and 'Table' in KERNEL_VDW */
-    vftab            = kernel_data->table_elec_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-    /* #elif 'Table' in KERNEL_ELEC */
-    vftab            = kernel_data->table_elec->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-    /* #elif 'Table' in KERNEL_VDW */
-    vftab            = kernel_data->table_vdw->data;
-    vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-    /* #endif */
-
-    /* #if 'Ewald' in KERNEL_ELEC */
-    sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
-    /*     #if KERNEL_VF=='Force' and KERNEL_MOD_ELEC!='PotentialSwitch' */
-    ewtab            = fr->ic->tabq_coul_F;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-    /*     #else */
-    ewtab            = fr->ic->tabq_coul_FDV0;
-    ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
-    ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-     /*     #endif */
-    /* #endif */
-
-    /* #if 'Water' in GEOMETRY_I */
-    /* Setup water-specific parameters */
-    inr              = nlist->iinr[0];
-    /*     #for I in PARTICLES_ELEC_I */
-    iq{I}              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+{I}]));
-    /*     #endfor */
-    /*     #for I in PARTICLES_VDW_I */
-    vdwioffset{I}      = 2*nvdwtype*vdwtype[inr+{I}];
-    /*     #endfor */
-    /* #endif */
-
-    /* #if 'Water' in GEOMETRY_J */
-    /*     #for J in PARTICLES_ELEC_J */
-    jq{J}              = gmx_fjsp_set1_v2r8(charge[inr+{J}]);
-    /*     #endfor */
-    /*     #for J in PARTICLES_VDW_J */
-    vdwjidx{J}A        = 2*vdwtype[inr+{J}];
-    /*     #endfor */
-    /*     #for I,J in PAIRS_IJ */
-    /*         #if 'electrostatics' in INTERACTION_FLAGS[I][J] */
-    qq{I}{J}             = _fjsp_mul_v2r8(iq{I},jq{J});
-    /*         #endif */
-    /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
-    /*             #if 'LJEwald' in KERNEL_VDW */
-    c6_{I}{J}            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset{I}+vdwjidx{J}A]);
-    c12_{I}{J}           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset{I}+vdwjidx{J}A+1]);
-    c6grid_{I}{J}        = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset{I}+vdwjidx{J}A]);
-    /*             #else */
-    c6_{I}{J}            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset{I}+vdwjidx{J}A]);
-    c12_{I}{J}           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset{I}+vdwjidx{J}A+1]);
-    /*             #endif */
-    /*         #endif */
-    /*     #endfor */
-    /* #endif */
-
-    /* #if KERNEL_MOD_ELEC!='None' or KERNEL_MOD_VDW!='None' */
-    /*     #if KERNEL_ELEC!='None' */
-    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
-    rcutoff_scalar   = fr->ic->rcoulomb;
-    /*     #else */
-    rcutoff_scalar   = fr->ic->rvdw;
-    /*     #endif */
-    rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
-    rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
-    /* #endif */
-
-    /* #if KERNEL_MOD_VDW=='PotentialShift' */
-    sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
-    rvdw             = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-    /* #endif */
-
-    /* #if 'PotentialSwitch' in [KERNEL_MOD_ELEC,KERNEL_MOD_VDW] */
-    /*     #if KERNEL_MOD_ELEC=='PotentialSwitch'  */
-    rswitch_scalar   = fr->ic->rcoulomb_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /*     #else */
-    rswitch_scalar   = fr->ic->rvdw_switch;
-    rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
-    /*     #endif */
-    /* Setup switch parameters */
-    d_scalar         = rcutoff_scalar-rswitch_scalar;
-    d                = gmx_fjsp_set1_v2r8(d_scalar);
-    swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
-    swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    /*     #if 'Force' in KERNEL_VF */
-    swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
-    swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
-    swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-    /*     #endif */
-    /* #endif */
-
-    /* Avoid stupid compiler warnings */
-    jnrA = jnrB = 0;
-    j_coord_offsetA = 0;
-    j_coord_offsetB = 0;
-
-    /* ## Keep track of the floating point operations we issue for reporting! */
-    /* #define OUTERFLOPS 0 */
-    outeriter        = 0;
-    inneriter        = 0;
-
-    /* Start outer loop over neighborlists */
-    for(iidx=0; iidx<nri; iidx++)
-    {
-        /* Load shift vector for this list */
-        i_shift_offset   = DIM*shiftidx[iidx];
-
-        /* Load limits for loop over neighbors */
-        j_index_start    = jindex[iidx];
-        j_index_end      = jindex[iidx+1];
-
-        /* Get outer coordinate index */
-        inr              = iinr[iidx];
-        i_coord_offset   = DIM*inr;
-
-        /* Load i particle coords and add shift vector */
-        /* #if GEOMETRY_I == 'Particle' */
-        gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-        /* #elif GEOMETRY_I == 'Water3' */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-        /* #elif GEOMETRY_I == 'Water4' */
-        /*     #if 0 in PARTICLES_I                 */
-        gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
-                                                 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-        /*     #else                                */
-        gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
-                                                 &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-        /*     #endif                               */
-        /* #endif                                   */
-
-        /* #if 'Force' in KERNEL_VF */
-        /*     #for I in PARTICLES_I */
-        fix{I}             = _fjsp_setzero_v2r8();
-        fiy{I}             = _fjsp_setzero_v2r8();
-        fiz{I}             = _fjsp_setzero_v2r8();
-        /*     #endfor */
-        /* #endif */
-
-        /* ## For water we already preloaded parameters at the start of the kernel */
-        /* #if not 'Water' in GEOMETRY_I */
-        /* Load parameters for i particles */
-        /*     #for I in PARTICLES_ELEC_I */
-        iq{I}              = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+{I}));
-        /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*     #endfor */
-        /*     #for I in PARTICLES_VDW_I */
-        vdwioffset{I}      = 2*nvdwtype*vdwtype[inr+{I}];
-        /*     #endfor */
-        /* #endif */
-
-        /* #if 'Potential' in KERNEL_VF */
-        /* Reset potential sums */
-        /*     #if KERNEL_ELEC != 'None' */
-        velecsum         = _fjsp_setzero_v2r8();
-        /*     #endif */
-        /*     #if KERNEL_VDW != 'None' */
-        vvdwsum          = _fjsp_setzero_v2r8();
-        /*     #endif */
-        /* #endif */
-
-        /* #for ROUND in ['Loop','Epilogue'] */
-
-        /* #if ROUND =='Loop' */
-        /* Start inner kernel loop */
-        for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
-        {
-        /* ## First round is normal loop (next statement resets indentation) */
-        /*     #if 0 */
-        }
-        /*     #endif */
-        /* #else */
-        if(jidx<j_index_end)
-        {
-        /* ## Second round is epilogue */
-        /* #endif */
-        /* #define INNERFLOPS 0 */
-
-            /* #if ROUND =='Loop' */
-            /* Get j neighbor index, and coordinate index */
-            jnrA             = jjnr[jidx];
-            jnrB             = jjnr[jidx+1];
-            j_coord_offsetA  = DIM*jnrA;
-            j_coord_offsetB  = DIM*jnrB;
-
-            /* load j atom coordinates */
-            /*     #if GEOMETRY_J == 'Particle'             */
-            gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0);
-            /*     #elif GEOMETRY_J == 'Water3'             */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-            /*     #elif GEOMETRY_J == 'Water4'             */
-            /*         #if 0 in PARTICLES_J                 */
-            gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-            /*         #else                                */
-            gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-            /*         #endif                               */
-            /*     #endif                                   */
-            /* #else */
-            jnrA             = jjnr[jidx];
-            j_coord_offsetA  = DIM*jnrA;
-
-            /* load j atom coordinates */
-            /*     #if GEOMETRY_J == 'Particle'             */
-            gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0);
-            /*     #elif GEOMETRY_J == 'Water3'             */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-            /*     #elif GEOMETRY_J == 'Water4'             */
-            /*         #if 0 in PARTICLES_J                 */
-            gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
-                                              &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
-                                              &jy2,&jz2,&jx3,&jy3,&jz3);
-            /*         #else                                */
-            gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
-                                              &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-            /*         #endif                               */
-            /*     #endif                                   */
-            /* #endif */
-
-            /* Calculate displacement vector */
-            /* #for I,J in PAIRS_IJ */
-            dx{I}{J}             = _fjsp_sub_v2r8(ix{I},jx{J});
-            dy{I}{J}             = _fjsp_sub_v2r8(iy{I},jy{J});
-            dz{I}{J}             = _fjsp_sub_v2r8(iz{I},jz{J});
-            /*     #define INNERFLOPS INNERFLOPS+3 */
-            /* #endfor */
-
-            /* Calculate squared distance and things based on it */
-            /* #for I,J in PAIRS_IJ */
-            rsq{I}{J}            = gmx_fjsp_calc_rsq_v2r8(dx{I}{J},dy{I}{J},dz{I}{J});
-            /*     #define INNERFLOPS INNERFLOPS+5 */
-            /* #endfor */
-
-            /* #for I,J in PAIRS_IJ */
-            /*     #if 'rinv' in INTERACTION_FLAGS[I][J] */
-            rinv{I}{J}           = gmx_fjsp_invsqrt_v2r8(rsq{I}{J});
-            /*         #define INNERFLOPS INNERFLOPS+5 */
-            /*     #endif */
-            /* #endfor */
-
-            /* #for I,J in PAIRS_IJ */
-            /*     #if 'rinvsq' in INTERACTION_FLAGS[I][J] */
-            /*         # if 'rinv' not in INTERACTION_FLAGS[I][J] */
-            rinvsq{I}{J}         = gmx_fjsp_inv_v2r8(rsq{I}{J});
-            /*             #define INNERFLOPS INNERFLOPS+4 */
-            /*         #else */
-            rinvsq{I}{J}         = _fjsp_mul_v2r8(rinv{I}{J},rinv{I}{J});
-            /*             #define INNERFLOPS INNERFLOPS+1 */
-            /*         #endif */
-            /*     #endif */
-            /* #endfor */
-
-            /* #if not 'Water' in GEOMETRY_J */
-            /* Load parameters for j particles */
-            /*     #for J in PARTICLES_ELEC_J */
-            /*         #if ROUND =='Loop' */
-            jq{J}              = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+{J},charge+jnrB+{J});
-            /*         #else */
-            jq{J}              = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+{J});
-            /*         #endif */
-            /*     #endfor */
-            /*     #for J in PARTICLES_VDW_J */
-            vdwjidx{J}A        = 2*vdwtype[jnrA+{J}];
-            /*         #if ROUND =='Loop' */
-            vdwjidx{J}B        = 2*vdwtype[jnrB+{J}];
-            /*         #endif */
-            /*     #endfor */
-            /* #endif */
-
-            /* #if 'Force' in KERNEL_VF and not 'Particle' in GEOMETRY_I */
-            /*     #for J in PARTICLES_J */
-            fjx{J}             = _fjsp_setzero_v2r8();
-            fjy{J}             = _fjsp_setzero_v2r8();
-            fjz{J}             = _fjsp_setzero_v2r8();
-            /*     #endfor */
-            /* #endif */
-
-            /* #for I,J in PAIRS_IJ */
-
-            /**************************
-             * CALCULATE INTERACTIONS *
-             **************************/
-
-            /*     #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
-            /*         ## We always calculate rinv/rinvsq above to enable pipelineing in compilers (performance tested on x86) */
-            if (gmx_fjsp_any_lt_v2r8(rsq{I}{J},rcutoff2))
-            {
-                /*     #if 0    ## this and the next two lines is a hack to maintain auto-indentation in template file */
-            }
-            /*         #endif */
-            /*         #define INNERFLOPS INNERFLOPS+1 */
-            /*     #endif */
-
-            /*     #if 'r' in INTERACTION_FLAGS[I][J] */
-            r{I}{J}              = _fjsp_mul_v2r8(rsq{I}{J},rinv{I}{J});
-             /*         #define INNERFLOPS INNERFLOPS+1 */
-            /*     #endif */
-
-            /*     ## For water geometries we already loaded parameters at the start of the kernel */
-            /*     #if not 'Water' in GEOMETRY_J */
-            /* Compute parameters for interactions between i and j atoms */
-            /*         #if 'electrostatics' in INTERACTION_FLAGS[I][J] */
-            qq{I}{J}             = _fjsp_mul_v2r8(iq{I},jq{J});
-            /*             #define INNERFLOPS INNERFLOPS+1 */
-            /*         #endif */
-            /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
-            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset{I}+vdwjidx{J}A,
-                                         vdwparam+vdwioffset{I}+vdwjidx{J}B,&c6_{I}{J},&c12_{I}{J});
-
-           /*             #if 'LJEwald' in KERNEL_VDW */
-            c6grid_{I}{J}       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset{I}+vdwjidx{J}A,
-                                                                   vdwgridparam+vdwioffset{I}+vdwjidx{J}B);
-            /*             #endif */
-            /*         #endif */
-            /*     #endif */
-
-            /*     #if 'table' in INTERACTION_FLAGS[I][J] */
-            /* Calculate table index by multiplying r with table scale and truncate to integer */
-            rt               = _fjsp_mul_v2r8(r{I}{J},vftabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(rt);
-            vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
-            twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
-            _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
-            /*         #define INNERFLOPS INNERFLOPS+4                          */
-            /*         #if 'Table' in KERNEL_ELEC and 'Table' in KERNEL_VDW     */
-            /*             ## 3 tables, 4 data per point: multiply index by 12 */
-            vfconv.i[0]     *= 12;
-            vfconv.i[1]     *= 12;
-            /*         #elif 'Table' in KERNEL_ELEC                             */
-            /*             ## 1 table, 4 data per point: multiply index by 4   */
-            vfconv.i[0]     *= 4;
-            vfconv.i[1]     *= 4;
-            /*         #elif 'Table' in KERNEL_VDW                              */
-            /*             ## 2 tables, 4 data per point: multiply index by 8  */
-            vfconv.i[0]     *= 8;
-            vfconv.i[1]     *= 8;
-            /*         #endif                                                   */
-            /*     #endif */
-
-            /*     ## ELECTROSTATIC INTERACTIONS */
-            /*     #if 'electrostatics' in INTERACTION_FLAGS[I][J] */
-
-            /*         #if KERNEL_ELEC=='Coulomb' */
-
-            /* COULOMB ELECTROSTATICS */
-            velec            = _fjsp_mul_v2r8(qq{I}{J},rinv{I}{J});
-            /*             #define INNERFLOPS INNERFLOPS+1 */
-            /*             #if 'Force' in KERNEL_VF */
-            felec            = _fjsp_mul_v2r8(velec,rinvsq{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+2 */
-            /*             #endif */
-
-            /*         #elif KERNEL_ELEC=='ReactionField' */
-
-            /* REACTION-FIELD ELECTROSTATICS */
-            /*             #if 'Potential' in KERNEL_VF */
-            velec            = _fjsp_mul_v2r8(qq{I}{J},_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq{I}{J},rinv{I}{J}),crf));
-            /*                 #define INNERFLOPS INNERFLOPS+4 */
-            /*             #endif */
-            /*             #if 'Force' in KERNEL_VF */
-            felec            = _fjsp_mul_v2r8(qq{I}{J},_fjsp_msub_v2r8(rinv{I}{J},rinvsq{I}{J},krf2));
-            /*                 #define INNERFLOPS INNERFLOPS+3 */
-            /*             #endif */
-
-            /*         #elif KERNEL_ELEC=='Ewald' */
-            /* EWALD ELECTROSTATICS */
-
-            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
-            ewrt             = _fjsp_mul_v2r8(r{I}{J},ewtabscale);
-            itab_tmp         = _fjsp_dtox_v2r8(ewrt);
-            eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
-           _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
-            /*             #define INNERFLOPS INNERFLOPS+4 */
-            /*             #if 'Potential' in KERNEL_VF or KERNEL_MOD_ELEC=='PotentialSwitch' */
-            ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
-            /*                 #if ROUND == 'Loop' */
-            ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
-            /*                 #else */
-            ewtabD           = _fjsp_setzero_v2r8();
-            /*                 #endif */
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
-            ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
-            /*                 #if ROUND == 'Loop' */
-            ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
-            /*                 #else */
-            ewtabFn          = _fjsp_setzero_v2r8();
-            /*                 #endif */
-            GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
-            felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
-            /*                 #define INNERFLOPS INNERFLOPS+2 */
-            /*                 #if KERNEL_MOD_ELEC=='PotentialShift' */            
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq{I}{J},_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv{I}{J},sh_ewald),velec));
-            /*                     #define INNERFLOPS INNERFLOPS+7 */
-            /*                 #else */
-            velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
-            velec            = _fjsp_mul_v2r8(qq{I}{J},_fjsp_sub_v2r8(rinv{I}{J},velec));
-            /*                     #define INNERFLOPS INNERFLOPS+6 */
-            /*                 #endif */
-            /*                 #if 'Force' in KERNEL_VF */
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq{I}{J},rinv{I}{J}),_fjsp_sub_v2r8(rinvsq{I}{J},felec));
-            /*                      #define INNERFLOPS INNERFLOPS+3 */
-            /*                 #endif */
-            /*             #elif KERNEL_VF=='Force' */
-            /*                 #if ROUND == 'Loop' */
-            gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
-                                         &ewtabF,&ewtabFn);
-            /*                 #else */
-            gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
-            /*                 #endif */
-            felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
-            felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq{I}{J},rinv{I}{J}),_fjsp_sub_v2r8(rinvsq{I}{J},felec));
-            /*                 #define INNERFLOPS INNERFLOPS+7 */
-            /*             #endif */
-
-            /*         #elif KERNEL_ELEC=='CubicSplineTable' */
-
-            /* CUBIC SPLINE TABLE ELECTROSTATICS */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            /*             #if ROUND == 'Loop' */
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            /*             #else */
-            F                = _fjsp_setzero_v2r8();
-            /*             #endif */
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
-            /*             #if ROUND == 'Loop' */
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
-            /*             #else */
-            H                = _fjsp_setzero_v2r8();
-            /*             #endif */
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
-            /*             #define INNERFLOPS INNERFLOPS+4 */
-            /*             #if 'Potential' in KERNEL_VF */
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            velec            = _fjsp_mul_v2r8(qq{I}{J},VV);
-            /*                 #define INNERFLOPS INNERFLOPS+3 */
-            /*             #endif */
-            /*             #if 'Force' in KERNEL_VF */
-            FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
-            felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq{I}{J},FF),_fjsp_mul_v2r8(vftabscale,rinv{I}{J})));
-            /*                 #define INNERFLOPS INNERFLOPS+7 */
-            /*             #endif */
-            /*         #endif */
-            /*         ## End of check for electrostatics interaction forms */
-            /*     #endif */
-            /*     ## END OF ELECTROSTATIC INTERACTION CHECK FOR PAIR I-J */
-
-            /*     #if 'vdw' in INTERACTION_FLAGS[I][J] */
-
-            /*         #if KERNEL_VDW=='LennardJones' */
-
-            /* LENNARD-JONES DISPERSION/REPULSION */
-
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq{I}{J},rinvsq{I}{J}),rinvsq{I}{J});
-            /*             #define INNERFLOPS INNERFLOPS+2 */
-            /*             #if 'Potential' in KERNEL_VF or KERNEL_MOD_VDW=='PotentialSwitch' */
-            vvdw6            = _fjsp_mul_v2r8(c6_{I}{J},rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_{I}{J},_fjsp_mul_v2r8(rinvsix,rinvsix));
-            /*                 #define INNERFLOPS INNERFLOPS+3 */
-            /*                 #if KERNEL_MOD_VDW=='PotentialShift' */
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_{I}{J},_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                                           _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_{I}{J},sh_vdw_invrcut6,vvdw6),one_sixth));
-            /*                     #define INNERFLOPS INNERFLOPS+8 */
-            /*                 #else */
-            vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
-            /*                     #define INNERFLOPS INNERFLOPS+3 */
-            /*                 #endif */
-            /*                 ## Check for force inside potential check, i.e. this means we already did the potential part */
-            /*                 #if 'Force' in KERNEL_VF */
-            fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq{I}{J});
-            /*                     #define INNERFLOPS INNERFLOPS+2 */
-            /*                 #endif */
-            /*             #elif KERNEL_VF=='Force' */
-            /*                 ## Force-only LennardJones makes it possible to save 1 flop (they do add up...) */
-            fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_{I}{J},rinvsix,c6_{I}{J}),_fjsp_mul_v2r8(rinvsix,rinvsq{I}{J}));
-            /*                 #define INNERFLOPS INNERFLOPS+4 */
-            /*             #endif */
-
-            /*         #elif KERNEL_VDW=='CubicSplineTable' */
-
-            /* CUBIC SPLINE TABLE DISPERSION */
-            /*             #if 'Table' in KERNEL_ELEC */
-            vfconv.i[0]       += 4;
-            vfconv.i[1]       += 4;
-            /*             #endif                     */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
-            /*             #if ROUND == 'Loop' */
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
-            /*             #else */
-            F                = _fjsp_setzero_v2r8();
-            /*             #endif */
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
-            /*             #if ROUND == 'Loop' */
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
-            /*             #else */
-            H                = _fjsp_setzero_v2r8();
-            /*             #endif */
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            /*             #define INNERFLOPS INNERFLOPS+4 */
-            /*             #if 'Potential' in KERNEL_VF */
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw6            = _fjsp_mul_v2r8(c6_{I}{J},VV);
-            /*                 #define INNERFLOPS INNERFLOPS+3 */
-            /*             #endif */
-            /*             #if 'Force' in KERNEL_VF */
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw6            = _fjsp_mul_v2r8(c6_{I}{J},FF);
-            /*                 #define INNERFLOPS INNERFLOPS+4 */
-            /*             #endif */
-
-            /* CUBIC SPLINE TABLE REPULSION */
-            Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
-            /*             #if ROUND == 'Loop' */
-            F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
-            /*             #else */
-            F                = _fjsp_setzero_v2r8();
-            /*             #endif */
-            GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
-            G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
-            /*             #if ROUND == 'Loop' */
-            H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
-            /*             #else */
-            H                = _fjsp_setzero_v2r8();
-            /*             #endif */
-            GMX_FJSP_TRANSPOSE2_V2R8(G,H);
-            Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
-            /*             #define INNERFLOPS INNERFLOPS+4 */
-            /*             #if 'Potential' in KERNEL_VF */
-            VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
-            vvdw12           = _fjsp_mul_v2r8(c12_{I}{J},VV);
-            /*                 #define INNERFLOPS INNERFLOPS+3 */
-            /*             #endif */
-            /*             #if 'Force' in KERNEL_VF */
-            FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
-            fvdw12           = _fjsp_mul_v2r8(c12_{I}{J},FF);
-            /*                 #define INNERFLOPS INNERFLOPS+5 */
-            /*             #endif */
-            /*             #if 'Potential' in KERNEL_VF */
-            vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif */
-            /*             #if 'Force' in KERNEL_VF */
-            fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv{I}{J})));
-            /*                 #define INNERFLOPS INNERFLOPS+4 */
-            /*             #endif */
-
-            /*         #elif KERNEL_VDW=='LJEwald' */
-
-            /* Analytical LJ-PME */
-            rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq{I}{J},rinvsq{I}{J}),rinvsq{I}{J});
-            ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq{I}{J});
-            ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(ewcljrsq);
-            /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
-           poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
-            /*                 #define INNERFLOPS INNERFLOPS+9 */
-            /*             #if 'Potential' in KERNEL_VF or KERNEL_MOD_VDW=='PotentialSwitch' */
-            /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_{I}{J},_fjsp_sub_v2r8(poly,one),c6_{I}{J}),rinvsix);
-            vvdw12           = _fjsp_mul_v2r8(c12_{I}{J},_fjsp_mul_v2r8(rinvsix,rinvsix));
-            /*                 #define INNERFLOPS INNERFLOPS+5 */
-            /*                 #if KERNEL_MOD_VDW=='PotentialShift' */
-            vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_{I}{J},_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
-                               _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_{I}{J},sh_lj_ewald,_fjsp_mul_v2r8(c6_{I}{J},sh_vdw_invrcut6))),one_sixth));
-            /*                     #define INNERFLOPS INNERFLOPS+7 */
-            /*                 #else */
-           vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
-            /*                 #define INNERFLOPS INNERFLOPS+2 */
-           /*                 #endif */
-            /*                  ## Check for force inside potential check, i.e. this means we already did the potential part */
-            /*                  #if 'Force' in KERNEL_VF */
-            /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
-           fvdw             = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_{I}{J},one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+6 */
-            /*                  #endif */
-            /*              #elif KERNEL_VF=='Force' */
-            /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_{I}{J},_fjsp_sub_v2r8(one,poly));
-            /* f6B = C6grid * exponent * beta^6 */
-            f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_{I}{J},one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
-            /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
-            fvdw              = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_{I}{J},rinvsix,_fjsp_sub_v2r8(c6_{I}{J},f6A)),rinvsix,f6B),rinvsq{I}{J});
-            /*                 #define INNERFLOPS INNERFLOPS+12 */
-            /*              #endif */
-            /*         #endif */
-            /*         ## End of check for vdw interaction forms */
-            /*     #endif */
-            /*     ## END OF VDW INTERACTION CHECK FOR PAIR I-J */
-
-            /*     #if 'switch' in INTERACTION_FLAGS[I][J] */
-            d                = _fjsp_sub_v2r8(r{I}{J},rswitch);
-            d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
-            d2               = _fjsp_mul_v2r8(d,d);
-            sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-            /*         #define INNERFLOPS INNERFLOPS+10 */
-
-            /*         #if 'Force' in KERNEL_VF */
-            dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-            /*             #define INNERFLOPS INNERFLOPS+5 */
-            /*         #endif */
-
-            /* Evaluate switch function */
-            /*         #if 'Force' in KERNEL_VF */
-            /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
-            /*             #if 'electrostatics' in INTERACTION_FLAGS[I][J] and KERNEL_MOD_ELEC=='PotentialSwitch' */
-            felec            = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv{I}{J},_fjsp_mul_v2r8(velec,dsw)) );
-            /*                 #define INNERFLOPS INNERFLOPS+4 */
-            /*             #endif */
-            /*             #if 'vdw' in INTERACTION_FLAGS[I][J] and KERNEL_MOD_VDW=='PotentialSwitch' */
-            fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv{I}{J},_fjsp_mul_v2r8(vvdw,dsw)) );
-            /*                 #define INNERFLOPS INNERFLOPS+4 */
-            /*             #endif */
-            /*         #endif */
-            /*         #if 'Potential' in KERNEL_VF */
-            /*             #if 'electrostatics' in INTERACTION_FLAGS[I][J] and KERNEL_MOD_ELEC=='PotentialSwitch' */
-            velec            = _fjsp_mul_v2r8(velec,sw);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif */
-            /*             #if 'vdw' in INTERACTION_FLAGS[I][J] and KERNEL_MOD_VDW=='PotentialSwitch' */
-            vvdw             = _fjsp_mul_v2r8(vvdw,sw);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif */
-            /*         #endif */
-            /*     #endif */
-            /*     #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
-            cutoff_mask      = _fjsp_cmplt_v2r8(rsq{I}{J},rcutoff2);
-            /*         #define INNERFLOPS INNERFLOPS+1 */
-            /*     #endif */
-
-            /*     #if 'Potential' in KERNEL_VF */
-            /* Update potential sum for this i atom from the interaction with this j atom. */
-            /*         #if 'electrostatics' in INTERACTION_FLAGS[I][J] */
-            /*             #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
-            velec            = _fjsp_and_v2r8(velec,cutoff_mask);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif                                       */
-            /*             #if ROUND == 'Epilogue' */
-            velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
-            /*             #endif */
-            velecsum         = _fjsp_add_v2r8(velecsum,velec);
-            /*             #define INNERFLOPS INNERFLOPS+1 */
-            /*         #endif */
-            /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
-            /*             #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
-            vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif                                       */
-            /*             #if ROUND == 'Epilogue' */
-            vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
-            /*             #endif */
-            vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
-            /*             #define INNERFLOPS INNERFLOPS+1 */
-            /*         #endif */
-            /*     #endif */
-
-            /*     #if 'Force' in KERNEL_VF */
-
-            /*         #if 'electrostatics' in INTERACTION_FLAGS[I][J] and 'vdw' in INTERACTION_FLAGS[I][J] */
-            fscal            = _fjsp_add_v2r8(felec,fvdw);
-            /*             #define INNERFLOPS INNERFLOPS+1 */
-            /*         #elif 'electrostatics' in INTERACTION_FLAGS[I][J] */
-            fscal            = felec;
-            /*         #elif 'vdw' in INTERACTION_FLAGS[I][J] */
-            fscal            = fvdw;
-            /*        #endif */
-
-            /*             #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
-            fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
-            /*                 #define INNERFLOPS INNERFLOPS+1 */
-            /*             #endif                                       */
-
-            /*             #if ROUND == 'Epilogue' */
-            fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-            /*             #endif */
-
-            /* ## Construction of vectorial force built into FMA instructions now */
-            /* #define INNERFLOPS INNERFLOPS+3      */
-            
-            /* Update vectorial force */
-            fix{I}             = _fjsp_madd_v2r8(dx{I}{J},fscal,fix{I});
-            fiy{I}             = _fjsp_madd_v2r8(dy{I}{J},fscal,fiy{I});
-            fiz{I}             = _fjsp_madd_v2r8(dz{I}{J},fscal,fiz{I});
-            /*             #define INNERFLOPS INNERFLOPS+6 */
-            
-            /* #if GEOMETRY_I == 'Particle'             */
-            /*     #if ROUND == 'Loop' */
-            gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx{I}{J},dy{I}{J},dz{I}{J});
-            /*     #else */
-            gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx{I}{J},dy{I}{J},dz{I}{J});
-            /*     #endif */
-            /*     #define INNERFLOPS INNERFLOPS+3      */
-            /* #else                                    */
-            fjx{J}             = _fjsp_madd_v2r8(dx{I}{J},fscal,fjx{J});
-            fjy{J}             = _fjsp_madd_v2r8(dy{I}{J},fscal,fjy{J});
-            fjz{J}             = _fjsp_madd_v2r8(dz{I}{J},fscal,fjz{J});
-            /*     #define INNERFLOPS INNERFLOPS+3      */
-            /* #endif                                   */
-
-            /*     #endif */
-
-            /*     #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
-            /*         #if 0    ## This and next two lines is a hack to maintain indentation in template file */
-            {
-                /*     #endif */
-            }
-            /*     #endif */
-            /*    ## End of check for the interaction being outside the cutoff */
-
-            /* #endfor */
-            /* ## End of loop over i-j interaction pairs */
-
-            /* #if 'Water' in GEOMETRY_I and GEOMETRY_J == 'Particle' */
-            /*     #if ROUND == 'Loop' */
-            gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-            /*     #else */
-            gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-            /*     #endif */
-            /*     #define INNERFLOPS INNERFLOPS+3      */
-            /* #elif GEOMETRY_J == 'Water3'             */
-            /*     #if ROUND == 'Loop' */
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-            /*     #else */
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-            /*     #endif */
-            /*     #define INNERFLOPS INNERFLOPS+9      */
-            /* #elif GEOMETRY_J == 'Water4'             */
-            /*     #if 0 in PARTICLES_J                 */
-            /*         #if ROUND == 'Loop' */
-            gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-            /*         #else */
-            gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-            /*         #endif */
-            /*         #define INNERFLOPS INNERFLOPS+12 */
-            /*     #else                                */
-            /*         #if ROUND == 'Loop' */
-            gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-            /*         #else */
-            gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-            /*         #endif */
-            /*         #define INNERFLOPS INNERFLOPS+9  */
-            /*     #endif                               */
-            /* #endif                                   */
-
-            /* Inner loop uses {INNERFLOPS} flops */
-        }
-
-        /* #endfor */
-
-        /* End of innermost loop */
-
-        /* #if 'Force' in KERNEL_VF */
-        /*     #if GEOMETRY_I == 'Particle'            */
-        gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-        /*         #define OUTERFLOPS OUTERFLOPS+6     */
-        /*     #elif GEOMETRY_I == 'Water3'            */
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-        /*         #define OUTERFLOPS OUTERFLOPS+18    */
-        /*     #elif GEOMETRY_I == 'Water4'            */
-        /*         #if 0 in PARTICLES_I                */
-        gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset,fshift+i_shift_offset);
-        /*             #define OUTERFLOPS OUTERFLOPS+24    */
-        /*         #else                               */
-        gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
-                                              f+i_coord_offset+DIM,fshift+i_shift_offset);
-        /*             #define OUTERFLOPS OUTERFLOPS+18    */
-        /*         #endif                              */
-        /*     #endif                                  */
-        /* #endif                                      */
-
-        /* #if 'Potential' in KERNEL_VF */
-        ggid                        = gid[iidx];
-        /* Update potential energies */
-        /*     #if KERNEL_ELEC != 'None' */
-        gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-        /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*     #endif */
-        /*     #if KERNEL_VDW != 'None' */
-        gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-        /*         #define OUTERFLOPS OUTERFLOPS+1 */
-        /*     #endif */
-        /* #endif */
-
-        /* Increment number of inner iterations */
-        inneriter                  += j_index_end - j_index_start;
-
-        /* Outer loop uses {OUTERFLOPS} flops */
-    }
-
-    /* Increment number of outer iterations */
-    outeriter        += nri;
-
-    /* Update outer/inner flops */
-    /* ## NB: This is not important, it just affects the flopcount. However, since our preprocessor is */
-    /* ## primitive and replaces aggressively even in strings inside these directives, we need to      */
-    /* ## assemble the main part of the name (containing KERNEL/ELEC/VDW) directly in the source.      */
-    /* #if GEOMETRY_I == 'Water3'            */
-    /*     #define ISUFFIX '_W3'             */
-    /* #elif GEOMETRY_I == 'Water4'          */
-    /*     #define ISUFFIX '_W4'             */
-    /* #else                                 */
-    /*     #define ISUFFIX ''                */
-    /* #endif                                */
-    /* #if GEOMETRY_J == 'Water3'            */
-    /*     #define JSUFFIX 'W3'              */
-    /* #elif GEOMETRY_J == 'Water4'          */
-    /*     #define JSUFFIX 'W4'              */
-    /* #else                                 */
-    /*     #define JSUFFIX ''                */
-    /* #endif                                */
-    /* #if 'PotentialAndForce' in KERNEL_VF  */
-    /*     #define VFSUFFIX  '_VF'           */
-    /* #elif 'Potential' in KERNEL_VF        */
-    /*     #define VFSUFFIX '_V'             */
-    /* #else                                 */
-    /*     #define VFSUFFIX '_F'             */
-    /* #endif                                */
-
-    /* #if KERNEL_ELEC != 'None' and KERNEL_VDW != 'None' */
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS});
-    /* #elif KERNEL_ELEC != 'None' */
-    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS});
-    /* #else */
-    inc_nrnb(nrnb,eNR_NBKERNEL_VDW{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS});
-    /* #endif  */
-}
index 0567e1d81244f74ac7d799cfb3f98d32845c145b..270f9ecf5977a1bf2d6b3871078f3fe2c9b9ec78 100644 (file)
 #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && GMX_DOUBLE
 #    include "gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
 #endif
 #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && GMX_DOUBLE
 #    include "gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
 #endif
-#if GMX_SIMD_SPARC64_HPC_ACE && GMX_DOUBLE
-#    include "gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h"
-#endif
-
 
 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
 static gmx_bool            nonbonded_setup_done  = FALSE;
 
 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
 static gmx_bool            nonbonded_setup_done  = FALSE;
@@ -150,9 +146,6 @@ gmx_nonbonded_setup(t_forcerec *   fr,
 #endif
 #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && GMX_DOUBLE
                 nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
 #endif
 #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && GMX_DOUBLE
                 nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
-#endif
-#if GMX_SIMD_SPARC64_HPC_ACE && GMX_DOUBLE
-                nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double, kernellist_sparc64_hpc_ace_double_size);
 #endif
                 ; /* empty statement to avoid a completely empty block */
             }
 #endif
                 ; /* empty statement to avoid a completely empty block */
             }
@@ -215,10 +208,6 @@ gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl, gmx_bool bElecAndVdwS
 #if GMX_SIMD_X86_SSE4_1 && GMX_DOUBLE
         /* No padding - see comment above */
         { "sse4_1_double", 1 },
 #if GMX_SIMD_X86_SSE4_1 && GMX_DOUBLE
         /* No padding - see comment above */
         { "sse4_1_double", 1 },
-#endif
-#if GMX_SIMD_SPARC64_HPC_ACE && GMX_DOUBLE
-        /* No padding - see comment above */
-        { "sparc64_hpc_ace_double", 1 },
 #endif
         { "c", 1 },
     };
 #endif
         { "c", 1 },
     };